Squashed 'librocksdb-sys/rocksdb/' content from commit 217f76421

git-subtree-dir: librocksdb-sys/rocksdb git-subtree-split: 217f7642119aa60a9fa64bc64e02d7f78813d911
2 years ago · 081ffa92ee
commit 081ffa92ee
3472 changed files with 1097423 additions and 0 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -0,0 +1,892 @@
+version: 2.1
+
+orbs:
+  win: circleci/windows@5.0.0
+
+commands:
+  install-cmake-on-macos:
+    steps:
+      - run:
+          name: Install cmake on macos
+          command: |
+            HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake
+
+  install-jdk8-on-macos:
+    steps:
+      - run:
+          name: Install JDK 8 on macos
+          command: |
+            brew install --cask adoptopenjdk/openjdk/adoptopenjdk8
+
+  increase-max-open-files-on-macos:
+    steps:
+      - run:
+          name: Increase max open files
+          command: |
+            sudo sysctl -w kern.maxfiles=1048576
+            sudo sysctl -w kern.maxfilesperproc=1048576
+            sudo launchctl limit maxfiles 1048576
+
+  pre-steps:
+    steps:
+      - checkout
+      - run:
+          name: Setup Environment Variables
+          command: |
+            echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV
+            echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV
+            echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV
+            echo "export GTEST_COLOR=1" >> $BASH_ENV
+            echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV
+            echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV
+            echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV
+            echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV
+            echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV
+            echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV
+            echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV
+
+  windows-build-steps:
+    steps:
+      - checkout
+      - run:
+          name: "Install thirdparty dependencies"
+          command: |
+            echo "Installing CMake..."
+            choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' -y
+            mkdir $Env:THIRDPARTY_HOME
+            cd $Env:THIRDPARTY_HOME
+            echo "Building Snappy dependency..."
+            curl https://github.com/google/snappy/archive/refs/tags/1.1.8.zip -O snappy-1.1.8.zip
+            unzip -q snappy-1.1.8.zip
+            cd snappy-1.1.8
+            mkdir build
+            cd build
+            & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" ..
+            msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
+      - run:
+          name: "Build RocksDB"
+          command: |
+            mkdir build
+            cd build
+            & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 ..
+            cd ..
+            echo "Building with VS version: $Env:CMAKE_GENERATOR"
+            msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
+      - run:
+          name: "Test RocksDB"
+          shell: powershell.exe
+          command: |
+            build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
+  pre-steps-macos:
+      steps:
+        - pre-steps
+
+  post-steps:
+    steps:
+      - store_test_results: # store test result if there's any
+          path: /tmp/test-results
+      - store_artifacts: # store LOG for debugging if there's any
+          path: LOG
+      - run: # on fail, compress Test Logs for diagnosing the issue
+          name: Compress Test Logs
+          command: tar -cvzf t.tar.gz t
+          when: on_fail
+      - store_artifacts: # on fail, store Test Logs for diagnosing the issue
+          path: t.tar.gz
+          destination: test_logs
+          when: on_fail
+      - run: # store core dumps if there's any
+          command: |
+            mkdir -p /tmp/core_dumps
+            cp core.* /tmp/core_dumps
+          when: on_fail
+      - store_artifacts:
+          path: /tmp/core_dumps
+          when: on_fail
+
+  upgrade-cmake:
+    steps:
+      - run:
+          name: Upgrade cmake
+          command: |
+            sudo apt remove --purge cmake
+            sudo snap install cmake --classic
+
+  install-gflags:
+    steps:
+      - run:
+          name: Install gflags
+          command: |
+            sudo apt-get update -y && sudo apt-get install -y libgflags-dev
+
+  install-gflags-on-macos:
+    steps:
+      - run:
+          name: Install gflags on macos
+          command: |
+            HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags
+
+  setup-folly:
+    steps:
+      - run:
+          name: Checkout folly sources
+          command: |
+            make checkout_folly
+
+  build-folly:
+    steps:
+      - run:
+          name: Build folly and dependencies
+          command: |
+            make build_folly
+
+  build-for-benchmarks:
+    steps:
+      - pre-steps
+      - run:
+          name: "Linux build for benchmarks"
+          command: #sized for the resource-class rocksdb-benchmark-sys1
+            make V=1 J=8 -j8 release
+
+  perform-benchmarks:
+    steps:
+      - run:
+          name: "Test low-variance benchmarks"
+          command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 20000000
+          environment:
+            LD_LIBRARY_PATH: /usr/local/lib
+            # How long to run parts of the test(s)
+            DURATION_RO: 300
+            DURATION_RW: 500
+            # Keep threads within physical capacity of server (much lower than default)
+            NUM_THREADS: 1
+            MAX_BACKGROUND_JOBS: 4
+            # Don't run a couple of "optional" initial tests
+            CI_TESTS_ONLY: "true"
+            # Reduce configured size of levels to ensure more levels in the leveled compaction LSM tree
+            WRITE_BUFFER_SIZE_MB: 16
+            TARGET_FILE_SIZE_BASE_MB: 16
+            MAX_BYTES_FOR_LEVEL_BASE_MB: 64
+            # The benchmark host has 32GB memory
+            # The following values are tailored to work with that
+            # Note, tests may not exercise the targeted issues if the memory is increased on new test hosts.
+            COMPRESSION_TYPE: "none"
+            CACHE_INDEX_AND_FILTER_BLOCKS: 1
+            MIN_LEVEL_TO_COMPRESS: 3
+            CACHE_SIZE_MB: 10240
+            MB_WRITE_PER_SEC: 2
+
+  post-benchmarks:
+    steps:
+      - store_artifacts: # store the benchmark output
+          path: /tmp/benchmark-results
+          destination: test_logs
+      - run:
+          name: Send benchmark report to visualisation
+          command: |
+            set +e
+            set +o pipefail
+            ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3_rix/_doc
+            true
+
+executors:
+  linux-docker:
+    docker:
+      # The image configuration is build_tools/ubuntu20_image/Dockerfile
+      # To update and build the image:
+      #  $ cd build_tools/ubuntu20_image
+      #  $ docker build -t zjay437/rocksdb:0.5 .
+      #  $ docker push zjay437/rocksdb:0.5
+      # `zjay437` is the account name for zjay@meta.com which readwrite token is shared internally. To login:
+      #  $ docker login --username zjay437
+      # Or please feel free to change it to your docker hub account for hosting the image, meta employee should already have the account and able to login with SSO.
+      # To avoid impacting the existing CI runs, please bump the version every time creating a new image
+      # to run the CI image environment locally:
+      #  $ docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -it zjay437/rocksdb:0.5 bash
+      # option `--cap-add=SYS_PTRACE --security-opt seccomp=unconfined` is used to enable gdb to attach an existing process
+      - image: zjay437/rocksdb:0.6
+
+jobs:
+  build-macos:
+    macos:
+      xcode: 12.5.1
+    resource_class: large
+    environment:
+      ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now
+    steps:
+      - increase-max-open-files-on-macos
+      - install-gflags-on-macos
+      - pre-steps-macos
+      - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
+      - post-steps
+
+  build-macos-cmake:
+    macos:
+      xcode: 12.5.1
+    resource_class: large
+    parameters:
+      run_even_tests:
+        description: run even or odd tests, used to split tests to 2 groups
+        type: boolean
+        default: true
+    steps:
+      - increase-max-open-files-on-macos
+      - install-cmake-on-macos
+      - install-gflags-on-macos
+      - pre-steps-macos
+      - run:
+          name: "cmake generate project file"
+          command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
+      - run:
+          name: "Build tests"
+          command: cd build && make V=1 -j32
+      - when:
+          condition: << parameters.run_even_tests >>
+          steps:
+            - run:
+                name: "Run even tests"
+                command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
+      - when:
+          condition:
+            not: << parameters.run_even_tests >>
+          steps:
+            - run:
+                name: "Run odd tests"
+                command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
+      - post-steps
+
+  build-linux:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: make V=1 J=32 -j32 check
+      - post-steps
+
+  build-linux-encrypted_env-no_compression:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
+      - run: |
+          ./sst_dump --help | grep -E -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
+      - post-steps
+
+  build-linux-static_lib-alt_namespace-status_checked:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 check
+      - post-steps
+
+  build-linux-release:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - checkout # check out the code in the project directory
+      - run: make V=1 -j32 LIB_MODE=shared release
+      - run: ls librocksdb.so # ensure shared lib built
+      - run: ./db_stress --version # ensure with gflags
+      - run: make clean
+      - run: make V=1 -j32 release
+      - run: ls librocksdb.a # ensure static lib built
+      - run: ./db_stress --version # ensure with gflags
+      - run: make clean
+      - run: apt-get remove -y libgflags-dev
+      - run: make V=1 -j32 LIB_MODE=shared release
+      - run: ls librocksdb.so # ensure shared lib built
+      - run: if ./db_stress --version; then false; else true; fi # ensure without gflags
+      - run: make clean
+      - run: make V=1 -j32 release
+      - run: ls librocksdb.a # ensure static lib built
+      - run: if ./db_stress --version; then false; else true; fi # ensure without gflags
+      - post-steps
+
+  build-linux-release-rtti:
+    executor: linux-docker
+    resource_class: xlarge
+    steps:
+      - checkout # check out the code in the project directory
+      - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
+      - run: ./db_stress --version # ensure with gflags
+      - run: make clean
+      - run: apt-get remove -y libgflags-dev
+      - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
+      - run: if ./db_stress --version; then false; else true; fi # ensure without gflags
+
+  build-linux-clang-no_test_run:
+    executor: linux-docker
+    resource_class: xlarge
+    steps:
+      - checkout # check out the code in the project directory
+      - run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all
+      - post-steps
+
+  build-linux-clang10-asan:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
+      - post-steps
+
+  build-linux-clang10-mini-tsan:
+    executor: linux-docker
+    resource_class: 2xlarge+
+    steps:
+      - pre-steps
+      - run: COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check
+      - post-steps
+
+  build-linux-clang10-ubsan:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
+      - post-steps
+
+  build-linux-valgrind:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: PORTABLE=1 make V=1 -j32 valgrind_test
+      - post-steps
+
+  build-linux-clang10-clang-analyze:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
+      - post-steps
+      - run:
+          name: "compress test report"
+          command: tar -cvzf scan_build_report.tar.gz scan_build_report
+          when: on_fail
+      - store_artifacts:
+          path: scan_build_report.tar.gz
+          destination: scan_build_report
+          when: on_fail
+
+  build-linux-runner:
+    machine: true
+    resource_class: facebook/rocksdb-benchmark-sys1
+    steps:
+      - pre-steps
+      - run:
+          name: "Checked Linux build (Runner)"
+          command: make V=1 J=8 -j8 check
+          environment:
+            LD_LIBRARY_PATH: /usr/local/lib
+      - post-steps
+
+  build-linux-cmake-with-folly:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - setup-folly
+      - build-folly
+      - run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20)
+      - post-steps
+
+  build-linux-cmake-with-folly-lite-no-test:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - setup-folly
+      - run: (mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j20)
+      - post-steps
+
+  build-linux-cmake-with-benchmark:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20
+      - post-steps
+
+  build-linux-unity-and-headers:
+    docker: # executor type
+      - image: gcc:latest
+    environment:
+      EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
+    resource_class: large
+    steps:
+      - checkout # check out the code in the project directory
+      - run: apt-get update -y && apt-get install -y libgflags-dev
+      - run:
+          name: "Unity build"
+          command: make V=1 -j8 unity_test
+          no_output_timeout: 20m
+      - run: make V=1 -j8 -k check-headers # could be moved to a different build
+      - post-steps
+
+  build-linux-gcc-7-with-folly:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - setup-folly
+      - build-folly
+      - run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures
+      - post-steps
+
+  build-linux-gcc-7-with-folly-lite-no-test:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - setup-folly
+      - run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 all
+      - post-steps
+
+  build-linux-gcc-8-no_test_run:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: CC=gcc-8 CXX=g++-8 V=1 make -j32 all
+      - post-steps
+
+  build-linux-cmake-with-folly-coroutines:
+    executor: linux-docker
+    resource_class: 2xlarge
+    environment:
+      CC: gcc-10
+      CXX: g++-10
+    steps:
+      - pre-steps
+      - setup-folly
+      - build-folly
+      - run: (mkdir build && cd build && cmake -DUSE_COROUTINES=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20)
+      - post-steps
+
+  build-linux-gcc-10-cxx20-no_test_run:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j32 all
+      - post-steps
+
+  build-linux-gcc-11-no_test_run:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j32 all microbench # TODO: LIB_MODE only to work around unresolved linker failures
+      - post-steps
+
+  build-linux-clang-13-no_test_run:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j32 all microbench
+      - post-steps
+
+  # Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
+  build-linux-clang-13-asan-ubsan-with-folly:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - setup-folly
+      - build-folly
+      - run: CC=clang-13 CXX=clang++-13 LIB_MODE=static USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures
+      - post-steps
+
+  # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
+  build-linux-run-microbench:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run: DEBUG_LEVEL=0 make -j32 run_microbench
+      - post-steps
+
+  build-linux-mini-crashtest:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=960 --max_key=2500000 --use_io_uring=0' blackbox_crash_test_with_atomic_flush
+      - post-steps
+
+  build-linux-crashtest-tiered-storage-bb:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run:
+          name: "run crashtest"
+          command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' blackbox_crash_test_with_tiered_storage
+          no_output_timeout: 100m
+      - post-steps
+
+  build-linux-crashtest-tiered-storage-wb:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run:
+          name: "run crashtest"
+          command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' whitebox_crash_test_with_tiered_storage
+          no_output_timeout: 100m
+      - post-steps
+
+  build-windows-vs2022:
+    executor:
+      name: win/server-2022
+      size: 2xlarge
+    environment:
+      THIRDPARTY_HOME: C:/Users/circleci/thirdparty
+      CMAKE_HOME: C:/Program Files/CMake
+      CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe
+      SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8
+      SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build
+      SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib
+      CMAKE_GENERATOR: Visual Studio 17 2022
+    steps:
+      - windows-build-steps
+
+  build-windows-vs2019:
+    executor:
+      name: win/server-2019
+      size: 2xlarge
+    environment:
+      THIRDPARTY_HOME: C:/Users/circleci/thirdparty
+      CMAKE_HOME: C:/Program Files/CMake
+      CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe
+      SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8
+      SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build
+      SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib
+      CMAKE_GENERATOR: Visual Studio 16 2019
+    steps:
+      - windows-build-steps
+
+  build-linux-java:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Test RocksDBJava"
+          command: make V=1 J=8 -j8 jtest
+      - post-steps
+
+  build-linux-java-static:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Build RocksDBJava Static Library"
+          command: make V=1 J=8 -j8 rocksdbjavastatic
+      - post-steps
+
+  build-macos-java:
+    macos:
+      xcode: 12.5.1
+    resource_class: large
+    environment:
+      JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
+      ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash
+    steps:
+      - increase-max-open-files-on-macos
+      - install-gflags-on-macos
+      - install-jdk8-on-macos
+      - pre-steps-macos
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Test RocksDBJava"
+          command: make V=1 J=16 -j16 jtest
+          no_output_timeout: 20m
+      - post-steps
+
+  build-macos-java-static:
+    macos:
+      xcode: 12.5.1
+    resource_class: large
+    environment:
+      JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
+    steps:
+      - increase-max-open-files-on-macos
+      - install-gflags-on-macos
+      - install-cmake-on-macos
+      - install-jdk8-on-macos
+      - pre-steps-macos
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Build RocksDBJava x86 and ARM Static Libraries"
+          command: make V=1 J=16 -j16 rocksdbjavastaticosx
+          no_output_timeout: 20m
+      - post-steps
+
+  build-macos-java-static-universal:
+    macos:
+      xcode: 12.5.1
+    resource_class: large
+    environment:
+      JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
+    steps:
+      - increase-max-open-files-on-macos
+      - install-gflags-on-macos
+      - install-cmake-on-macos
+      - install-jdk8-on-macos
+      - pre-steps-macos
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Build RocksDBJava Universal Binary Static Library"
+          command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub
+          no_output_timeout: 20m
+      - post-steps
+
+  build-examples:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run:
+          name: "Build examples"
+          command: |
+            make V=1 -j4 static_lib && cd examples && make V=1 -j4
+      - post-steps
+
+  build-cmake-mingw:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run: update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix
+      - run:
+          name: "Build cmake-mingw"
+          command: |
+            export PATH=$JAVA_HOME/bin:$PATH
+            echo "JAVA_HOME=${JAVA_HOME}"
+            which java && java -version
+            which javac && javac -version
+            mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni
+      - post-steps
+
+  build-linux-non-shm:
+    executor: linux-docker
+    resource_class: 2xlarge
+    environment:
+      TEST_TMPDIR: /tmp/rocksdb_test_tmp
+    steps:
+      - pre-steps
+      - run: make V=1 -j32 check
+      - post-steps
+
+  build-linux-arm-test-full:
+    machine:
+      image: ubuntu-2004:202111-02
+    resource_class: arm.large
+    steps:
+      - pre-steps
+      - install-gflags
+      - run: make V=1 J=4 -j4 check
+      - post-steps
+
+  build-linux-arm:
+    machine:
+      image: ubuntu-2004:202111-02
+    resource_class: arm.large
+    steps:
+      - pre-steps
+      - install-gflags
+      - run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some
+      - post-steps
+
+  build-linux-arm-cmake-no_test_run:
+    machine:
+      image: ubuntu-2004:202111-02
+    resource_class: arm.large
+    environment:
+      JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64
+    steps:
+      - pre-steps
+      - install-gflags
+      - run:
+          name: "Set Java Environment"
+          command: |
+            echo "JAVA_HOME=${JAVA_HOME}"
+            echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
+            which java && java -version
+            which javac && javac -version
+      - run:
+          name: "Build with cmake"
+          command: |
+            mkdir build
+            cd build
+            cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 ..
+            make -j4
+      - run:
+          name: "Build Java with cmake"
+          command: |
+            rm -rf build
+            mkdir build
+            cd build
+            cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 ..
+            make -j4 rocksdb rocksdbjni
+      - post-steps
+
+  build-format-compatible:
+    executor: linux-docker
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - run:
+          name: "test"
+          command: |
+            export TEST_TMPDIR=/dev/shm/rocksdb
+            rm -rf /dev/shm/rocksdb
+            mkdir /dev/shm/rocksdb
+            tools/check_format_compatible.sh
+      - post-steps
+
+  build-fuzzers:
+    executor: linux-docker
+    resource_class: large
+    steps:
+      - pre-steps
+      - run:
+          name: "Build rocksdb lib"
+          command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib
+      - run:
+          name: "Build fuzzers"
+          command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer
+      - post-steps
+
+  benchmark-linux: #use a private Circle CI runner (resource_class) to run the job
+    machine: true
+    resource_class: facebook/rocksdb-benchmark-sys1
+    steps:
+      - build-for-benchmarks
+      - perform-benchmarks
+      - post-benchmarks
+
+workflows:
+  version: 2
+  jobs-linux-run-tests:
+    jobs:
+      - build-linux
+      - build-linux-cmake-with-folly
+      - build-linux-cmake-with-folly-lite-no-test
+      - build-linux-gcc-7-with-folly
+      - build-linux-gcc-7-with-folly-lite-no-test
+      - build-linux-cmake-with-folly-coroutines
+      - build-linux-cmake-with-benchmark
+      - build-linux-encrypted_env-no_compression
+  jobs-linux-run-tests-san:
+    jobs:
+      - build-linux-clang10-asan
+      - build-linux-clang10-ubsan
+      - build-linux-clang10-mini-tsan
+      - build-linux-static_lib-alt_namespace-status_checked
+  jobs-linux-no-test-run:
+    jobs:
+      - build-linux-release
+      - build-linux-release-rtti
+      - build-examples
+      - build-fuzzers
+      - build-linux-clang-no_test_run
+      - build-linux-clang-13-no_test_run
+      - build-linux-gcc-8-no_test_run
+      - build-linux-gcc-10-cxx20-no_test_run
+      - build-linux-gcc-11-no_test_run
+      - build-linux-arm-cmake-no_test_run
+  jobs-linux-other-checks:
+    jobs:
+      - build-linux-clang10-clang-analyze
+      - build-linux-unity-and-headers
+      - build-linux-mini-crashtest
+  jobs-windows:
+    jobs:
+      - build-windows-vs2022
+      - build-windows-vs2019
+      - build-cmake-mingw
+  jobs-java:
+    jobs:
+      - build-linux-java
+      - build-linux-java-static
+      - build-macos-java
+      - build-macos-java-static
+      - build-macos-java-static-universal
+  jobs-macos:
+    jobs:
+      - build-macos
+      - build-macos-cmake:
+          run_even_tests: true
+      - build-macos-cmake:
+          run_even_tests: false
+  jobs-linux-arm:
+    jobs:
+      - build-linux-arm
+  build-fuzzers:
+    jobs:
+      - build-fuzzers
+  benchmark-linux:
+    triggers:
+      - schedule:
+          cron: "0 * * * *"
+          filters:
+            branches:
+              only:
+                - main
+    jobs:
+      - benchmark-linux
+  nightly:
+    triggers:
+      - schedule:
+          cron: "0 9 * * *"
+          filters:
+            branches:
+              only:
+                - main
+    jobs:
+      - build-format-compatible
+      - build-linux-arm-test-full
+      - build-linux-run-microbench
+      - build-linux-non-shm
+      - build-linux-clang-13-asan-ubsan-with-folly
+      - build-linux-valgrind
--- a/.circleci/ubsan_suppression_list.txt
+++ b/.circleci/ubsan_suppression_list.txt
@ -0,0 +1,6 @@
+# Supress UBSAN warnings related to stl_tree.h, e.g.
+# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in 
+# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43:
+# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type
+# 'std::_Rb_tree_node<std::pair<const std::__cxx11::basic_string<char>, rocksdb::(anonymous namespace)::LockHoldingInfo> >'
+src:*bits/stl_tree.h
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,5 @@
+# Complete list of style options can be found at: 
+# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
+BasedOnStyle: Google
+...
--- a/.github/workflows/sanity_check.yml
+++ b/.github/workflows/sanity_check.yml
@ -0,0 +1,45 @@
+name: Check buck targets and code format
+on: [push, pull_request]
+permissions:
+  contents: read
+
+jobs:
+  check:
+    name: Check TARGETS file and code format
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout feature branch
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+
+    - name: Fetch from upstream
+      run: |
+        git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream
+
+    - name: Where am I
+      run: |
+        echo git status && git status
+        echo "git remote -v" && git remote -v
+        echo git branch && git branch
+
+    - name: Setup Python
+      uses: actions/setup-python@v1
+
+    - name: Install Dependencies
+      run: python -m pip install --upgrade pip
+
+    - name: Install argparse
+      run: pip install argparse
+
+    - name: Download clang-format-diff.py
+      run: wget https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
+
+    - name: Check format
+      run: VERBOSE_CHECK=1 make check-format
+
+    - name: Compare buckify output
+      run: make check-buck-targets
+
+    - name: Simple source code checks
+      run: make check-sources
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,99 @@
+make_config.mk
+rocksdb.pc
+
+*.a
+*.arc
+*.d
+*.dylib*
+*.gcda
+*.gcno
+*.o
+*.o.tmp
+*.so
+*.so.*
+*_test
+*_bench
+*_stress
+*.out
+*.class
+*.jar
+*.*jnilib*
+*.d-e
+*.o-*
+*.swp
+*~
+*.vcxproj
+*.vcxproj.filters
+*.sln
+*.cmake
+.watchmanconfig
+CMakeCache.txt
+CMakeFiles/
+build/
+
+ldb
+manifest_dump
+sst_dump
+blob_dump
+block_cache_trace_analyzer
+tools/block_cache_analyzer/*.pyc
+column_aware_encoding_exp
+util/build_version.cc
+build_tools/VALGRIND_LOGS/
+coverage/COVERAGE_REPORT
+.gdbhistory
+.gdb_history
+package/
+unity.a
+tags
+etags
+rocksdb_dump
+rocksdb_undump
+db_test2
+trace_analyzer
+block_cache_trace_analyzer
+io_tracer_parser
+.DS_Store
+.vs
+.vscode
+.clangd
+
+java/out
+java/target
+java/test-libs
+java/*.log
+java/include/org_rocksdb_*.h
+
+.idea/
+*.iml
+
+rocksdb.cc
+rocksdb.h
+unity.cc
+java/crossbuild/.vagrant
+.vagrant/
+java/**/*.asc
+java/javadoc
+
+scan_build_report/
+t
+LOG
+
+db_logs/
+tp2/
+fbcode/
+fbcode
+buckifier/*.pyc
+buckifier/__pycache__
+
+compile_commands.json
+clang-format-diff.py
+.py3/
+
+fuzz/proto/gen/
+fuzz/crash-*
+
+cmake-build-*
+third-party/folly/
+.cache
+*.sublime-*
--- a/.lgtm.yml
+++ b/.lgtm.yml
@ -0,0 +1,4 @@
+extraction:
+  cpp:
+    index:
+      build_command: make static_lib
--- a/.watchmanconfig
+++ b/.watchmanconfig
@ -0,0 +1,6 @@
+{
+  "content_hash_warming": true,
+  "content_hash_max_items": 333333,
+  "hint_num_files_per_dir": 8,
+  "fsevents_latency": 0.05
+}
--- a/12
+++ b/12
@ -0,0 +1,12 @@
+Facebook Inc.
+Facebook Engineering Team
+
+Google Inc.
+# Initial version authors:
+Jeffrey Dean <jeff@google.com>
+Sanjay Ghemawat <sanjay@google.com>
+
+# Partial list of contributors:
+Kevin Regan <kevin.d.regan@gmail.com>
+Johan Bilien <jobi@litl.com>
+Matthew Von-Maszewski <https://github.com/matthewvon> (Basho Technologies)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,77 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <opensource-conduct@fb.com>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
+
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,17 @@
+# Contributing to RocksDB
+
+## Code of Conduct
+The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md)
+
+## Contributor License Agreement ("CLA")
+
+In order to accept your pull request, we need you to submit a CLA. You
+only need to do this once, so if you've done this for another Facebook
+open source project, you're good to go. If you are submitting a pull
+request for the first time, just let us know that you have completed
+the CLA and we can cross-check with your GitHub username.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+If you prefer to sign a paper copy, we can send you a PDF.  Send us an 
+e-mail or create a new github issue to request the CLA in PDF format.
--- a/339
+++ b/339
@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/DEFAULT_OPTIONS_HISTORY.md
+++ b/DEFAULT_OPTIONS_HISTORY.md
@ -0,0 +1,24 @@
+# RocksDB default options change log (NO LONGER MAINTAINED)
+## Unreleased
+* delayed_write_rate takes the rate given by rate_limiter if not specified.
+
+## 5.2
+* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
+
+## 5.0 (11/17/2016)
+* Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default
+* Options.level0_stop_writes_trigger default value changes from 24 to 32.
+
+## 4.8.0 (5/2/2016)
+* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters.
+* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks.
+* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata.
+
+## 4.7.0 (4/8/2016)
+* options.write_buffer_size changes from 4MB to 64MB.
+* options.target_file_size_base changes from 2MB to 64MB.
+* options.max_bytes_for_level_base changes from 10MB to 256MB.
+* options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB.
+* options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB.
+* table_cache_numshardbits changes from 4 to 6.
+* max_file_opening_threads changes from 1 to 16.
--- a/DUMP_FORMAT.md
+++ b/DUMP_FORMAT.md
@ -0,0 +1,16 @@
+## RocksDB dump format
+
+The version 1 RocksDB dump format is fairly simple:
+
+1) The dump starts with the magic 8 byte identifier "ROCKDUMP"
+
+2) The magic is followed by an 8 byte big-endian version which is 0x00000001.
+
+3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is.
+
+4) The first chunk is special and is a json string indicating some things about the creation of this dump.  It contains the following keys:
+* database-path: The path of the database this dump was created from.
+* hostname: The hostname of the machine where the dump was created.
+* creation-time: Unix seconds since epoc when this dump was created.
+
+5) Following the info dump the slices paired into are key/value pairs.
--- a/HISTORY.md
+++ b/HISTORY.md
--- a/INSTALL.md
+++ b/INSTALL.md
@ -0,0 +1,220 @@
+## Compilation
+
+**Important**: If you plan to run RocksDB in production, don't compile using default
+`make` or `make all`. That will compile RocksDB in debug mode, which is much slower
+than release mode.
+
+RocksDB's library should be able to compile without any dependency installed,
+although we recommend installing some compression libraries (see below).
+We do depend on newer gcc/clang with C++17 support (GCC >= 7, Clang >= 5).
+
+There are few options when compiling RocksDB:
+
+* [recommended] `make static_lib` will compile librocksdb.a, RocksDB static library. Compiles static library in release mode.
+
+* `make shared_lib` will compile librocksdb.so, RocksDB shared library. Compiles shared library in release mode.
+
+* `make check` will compile and run all the unit tests. `make check` will compile RocksDB in debug mode.
+
+* `make all` will compile our static library, and all our tools and unit tests. Our tools
+depend on gflags. You will need to have gflags installed to run `make all`. This will compile RocksDB in debug mode. Don't
+use binaries compiled by `make all` in production.
+
+* By default the binary we produce is optimized for the CPU you're compiling on
+(`-march=native` or the equivalent). To build a binary compatible with the most
+general architecture supported by your CPU and compiler, set `PORTABLE=1` for
+the build, but performance will suffer as many operations benefit from newer
+and wider instructions. In addition to `PORTABLE=0` (default) and `PORTABLE=1`,
+it can be set to an architecture name recognized by your compiler. For example,
+on 64-bit x86, a reasonable compromise is `PORTABLE=haswell` which supports
+many or most of the available optimizations while still being compatible with
+most processors made since roughly 2013.
+
+## Dependencies
+
+* You can link RocksDB with following compression libraries:
+  - [zlib](http://www.zlib.net/) - a library for data compression.
+  - [bzip2](http://www.bzip.org/) - a library for data compression.
+  - [lz4](https://github.com/lz4/lz4) - a library for extremely fast data compression.
+  - [snappy](http://google.github.io/snappy/) - a library for fast
+      data compression.
+  - [zstandard](http://www.zstd.net) - Fast real-time compression
+      algorithm.
+
+* All our tools depend on:
+  - [gflags](https://gflags.github.io/gflags/) - a library that handles
+      command line flags processing. You can compile rocksdb library even
+      if you don't have gflags installed.
+
+* `make check` will also check code formatting, which requires [clang-format](https://clang.llvm.org/docs/ClangFormat.html)
+
+* If you wish to build the RocksJava static target, then cmake is required for building Snappy.
+
+* If you wish to run microbench (e.g, `make microbench`, `make ribbon_bench` or `cmake -DWITH_BENCHMARK=1`), Google benchmark >= 1.6.0 is needed.
+* You can do the following to install Google benchmark. These commands are copied from `./build_tools/ubuntu20_image/Dockerfile`:
+
+`$ git clone --depth 1 --branch v1.7.0 https://github.com/google/benchmark.git ~/benchmark`
+
+`$ cd ~/benchmark && mkdir build && cd build && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0 && ninja && ninja install`
+
+## Supported platforms
+
+* **Linux - Ubuntu**
+    * Upgrade your gcc to version at least 7 to get C++17 support.
+    * Install gflags. First, try: `sudo apt-get install libgflags-dev`
+      If this doesn't work and you're using Ubuntu, here's a nice tutorial:
+      (http://askubuntu.com/questions/312173/installing-gflags-12-04)
+    * Install snappy. This is usually as easy as:
+      `sudo apt-get install libsnappy-dev`.
+    * Install zlib. Try: `sudo apt-get install zlib1g-dev`.
+    * Install bzip2: `sudo apt-get install libbz2-dev`.
+    * Install lz4: `sudo apt-get install liblz4-dev`.
+    * Install zstandard: `sudo apt-get install libzstd-dev`.
+
+* **Linux - CentOS / RHEL**
+    * Upgrade your gcc to version at least 7 to get C++17 support
+    * Install gflags:
+
+              git clone https://github.com/gflags/gflags.git
+              cd gflags
+              git checkout v2.0
+              ./configure && make && sudo make install
+
+      **Notice**: Once installed, please add the include path for gflags to your `CPATH` environment variable and the
+      lib path to `LIBRARY_PATH`. If installed with default settings, the include path will be `/usr/local/include`
+      and the lib path will be `/usr/local/lib`.
+
+    * Install snappy:
+
+              sudo yum install snappy snappy-devel
+
+    * Install zlib:
+
+              sudo yum install zlib zlib-devel
+
+    * Install bzip2:
+
+              sudo yum install bzip2 bzip2-devel
+
+    * Install lz4:
+
+              sudo yum install lz4-devel
+
+    * Install ASAN (optional for debugging):
+
+              sudo yum install libasan
+
+    * Install zstandard:
+        * With [EPEL](https://fedoraproject.org/wiki/EPEL):
+
+              sudo yum install libzstd-devel
+
+        * With CentOS 8:
+
+              sudo dnf install libzstd-devel
+
+        * From source:
+
+              wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz
+              mv v1.1.3.tar.gz zstd-1.1.3.tar.gz
+              tar zxvf zstd-1.1.3.tar.gz
+              cd zstd-1.1.3
+              make && sudo make install
+
+* **OS X**:
+    * Install latest C++ compiler that supports C++ 17:
+        * Update XCode:  run `xcode-select --install` (or install it from XCode App's settting).
+        * Install via [homebrew](http://brew.sh/).
+            * If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line.
+            * run `brew tap homebrew/versions; brew install gcc7 --use-llvm` to install gcc 7 (or higher).
+    * run `brew install rocksdb`
+
+* **FreeBSD** (11.01):
+
+    * You can either install RocksDB from the Ports system using `cd /usr/ports/databases/rocksdb && make install`, or you can follow the details below to install dependencies and compile from source code:
+
+    * Install the dependencies for RocksDB:
+
+        export BATCH=YES
+        cd /usr/ports/devel/gmake && make install
+        cd /usr/ports/devel/gflags && make install
+
+        cd /usr/ports/archivers/snappy && make install
+        cd /usr/ports/archivers/bzip2 && make install
+        cd /usr/ports/archivers/liblz4 && make install
+        cd /usr/ports/archivesrs/zstd && make install
+
+        cd /usr/ports/devel/git && make install
+
+
+    * Install the dependencies for RocksJava (optional):
+
+        export BATCH=yes
+        cd /usr/ports/java/openjdk7 && make install
+
+    * Build RocksDB from source:
+        cd ~
+        git clone https://github.com/facebook/rocksdb.git
+        cd rocksdb
+        gmake static_lib
+
+    * Build RocksJava from source (optional):
+        cd rocksdb
+        export JAVA_HOME=/usr/local/openjdk7
+        gmake rocksdbjava
+
+* **OpenBSD** (6.3/-current):
+
+    * As RocksDB is not available in the ports yet you have to build it on your own:
+
+    * Install the dependencies for RocksDB:
+
+        pkg_add gmake gflags snappy bzip2 lz4 zstd git jdk bash findutils gnuwatch
+
+    * Build RocksDB from source:
+
+        cd ~
+        git clone https://github.com/facebook/rocksdb.git
+        cd rocksdb
+        gmake static_lib
+
+    * Build RocksJava from source (optional):
+
+        cd rocksdb
+        export JAVA_HOME=/usr/local/jdk-1.8.0
+        export PATH=$PATH:/usr/local/jdk-1.8.0/bin
+        gmake rocksdbjava
+
+* **iOS**:
+  * Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define an important pre-processing macros: `IOS_CROSS_COMPILE`.
+
+* **Windows** (Visual Studio 2017 to up):
+  * Read and follow the instructions at CMakeLists.txt
+  * Or install via [vcpkg](https://github.com/microsoft/vcpkg)
+       * run `vcpkg install rocksdb:x64-windows`
+
+* **AIX 6.1**
+    * Install AIX Toolbox rpms with gcc
+    * Use these environment variables:
+
+             export PORTABLE=1
+             export CC=gcc
+             export AR="ar -X64"
+             export EXTRA_ARFLAGS=-X64
+             export EXTRA_CFLAGS=-maix64
+             export EXTRA_CXXFLAGS=-maix64
+             export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"
+             export LIBPATH=/opt/freeware/lib
+             export JAVA_HOME=/usr/java8_64
+             export PATH=/opt/freeware/bin:$PATH
+
+* **Solaris Sparc**
+    * Install GCC 7 and higher.
+    * Use these environment variables:
+
+             export CC=gcc
+             export EXTRA_CFLAGS=-m64
+             export EXTRA_CXXFLAGS=-m64
+             export EXTRA_LDFLAGS=-m64
+             export PORTABLE=1
+             export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"
--- a/LANGUAGE-BINDINGS.md
+++ b/LANGUAGE-BINDINGS.md
@ -0,0 +1,26 @@
+This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it.
+
+* Java - https://github.com/facebook/rocksdb/tree/main/java
+* Python
+    * http://python-rocksdb.readthedocs.io/en/latest/
+    * http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained)
+* Perl - https://metacpan.org/pod/RocksDB
+* Node.js - https://npmjs.org/package/rocksdb
+* Go 
+  * https://github.com/linxGnu/grocksdb
+  * https://github.com/tecbot/gorocksdb (unmaintained)
+* Ruby - http://rubygems.org/gems/rocksdb-ruby
+* Haskell - https://hackage.haskell.org/package/rocksdb-haskell
+* PHP - https://github.com/Photonios/rocksdb-php
+* C#
+    * https://github.com/warrenfalk/rocksdb-sharp
+    * https://github.com/curiosity-ai/rocksdb-sharp
+* Rust
+    * https://github.com/pingcap/rust-rocksdb (used in production fork of https://github.com/spacejam/rust-rocksdb)
+    * https://github.com/spacejam/rust-rocksdb
+    * https://github.com/bh1xuw/rust-rocks
+* D programming language - https://github.com/b1naryth1ef/rocksdb
+* Erlang - https://gitlab.com/barrel-db/erlang-rocksdb
+* Elixir - https://github.com/urbint/rox
+* Nim - https://github.com/status-im/nim-rocksdb
+* Swift and Objective-C (iOS/OSX) - https://github.com/iabudiab/ObjectiveRocks 
--- a/LICENSE.Apache
+++ b/LICENSE.Apache
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/LICENSE.leveldb
+++ b/LICENSE.leveldb
@ -0,0 +1,29 @@
+This contains code that is from LevelDB, and that code is under the following license:
+
+Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/2582
+++ b/2582
--- a/PLUGINS.md
+++ b/PLUGINS.md
@ -0,0 +1,8 @@
+This is the list of all known third-party plugins for RocksDB. If something is missing, please open a pull request to add it.
+
+* [Dedupfs](https://github.com/ajkr/dedupfs): an example for plugin developers to reference
+* [HDFS](https://github.com/riversand963/rocksdb-hdfs-env): an Env used for interacting with HDFS. Migrated from main RocksDB repo
+* [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices
+* [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo.
+* [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB.
+* [IPPCP](https://github.com/intel/ippcp-plugin-rocksdb): a plugin to enable encryption on RocksDB based on Intel optimized open source IPP-Crypto library.
--- a/README.md
+++ b/README.md
@ -0,0 +1,29 @@
+## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
+
+[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb)
+
+RocksDB is developed and maintained by Facebook Database Engineering Team.
+It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com)
+and Jeff Dean (jeff@google.com)
+
+This code is a library that forms the core building block for a fast
+key-value server, especially suited for storing data on flash drives.
+It has a Log-Structured-Merge-Database (LSM) design with flexible tradeoffs
+between Write-Amplification-Factor (WAF), Read-Amplification-Factor (RAF)
+and Space-Amplification-Factor (SAF). It has multi-threaded compactions,
+making it especially suitable for storing multiple terabytes of data in a
+single database.
+
+Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples
+
+See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation.
+
+The public interface is in `include/`.  Callers should not include or
+rely on the details of any other header files in this package.  Those
+internal APIs may be changed without warning.
+
+Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups.
+
+## License
+
+RocksDB is dual-licensed under both the GPLv2 (found in the COPYING file in the root directory) and Apache 2.0 License (found in the LICENSE.Apache file in the root directory).  You may select, at your option, one of the above-listed licenses.
--- a/5607
+++ b/5607
--- a/USERS.md
+++ b/USERS.md
@ -0,0 +1,165 @@
+This document lists users of RocksDB and their use cases. If you are using RocksDB, please open a pull request and add yourself to the list.
+
+## Facebook
+At Facebook, we use RocksDB as storage engines in multiple data management services and a backend for many different stateful services, including:
+
+1. MyRocks -- https://github.com/MySQLOnRocksDB/mysql-5.6
+2. MongoRocks -- https://github.com/mongodb-partners/mongo-rocks
+3. ZippyDB --  Facebook's distributed key-value store with Paxos-style replication, built on top of RocksDB.[1] https://www.youtube.com/watch?v=DfiN7pG0D0khtt
+4. Laser -- Laser is a high query throughput, low (millisecond) latency, key-value storage service built on top of RocksDB.[1]
+4. Dragon -- a distributed graph query engine. https://code.facebook.com/posts/1737605303120405/dragon-a-distributed-graph-query-engine/
+5. Stylus -- a low-level stream processing framework writtenin C++.[1]
+6. LogDevice -- a distributed data store for logs [2]
+
+[1] https://research.facebook.com/publications/realtime-data-processing-at-facebook/
+
+[2] https://code.facebook.com/posts/357056558062811/logdevice-a-distributed-data-store-for-logs/
+
+## Bilibili
+[Bilibili](bilibili.com) [uses](https://www.alluxio.io/blog/when-ai-meets-alluxio-at-bilibili-building-an-efficient-ai-platform-for-data-preprocessing-and-model-training/) Alluxio to speed up its ML training workloads, and Alluxio uses RocksDB to store its filesystem metadata, so Bilibili uses RocksDB.
+
+Bilibili's [real-time platform](https://www.alibabacloud.com/blog/architecture-and-practices-of-bilibilis-real-time-platform_596676) uses Flink, and uses RocksDB as Flink's state store.
+
+## TikTok
+TikTok, or its parent company ByteDance, uses RocksDB as the storage engine for some storage systems, such as its distributed graph database [ByteGraph](https://vldb.org/pvldb/vol15/p3306-li.pdf). 
+
+Also, TikTok uses [Alluxio](alluxio.io) to [speed up Presto queries](https://www.alluxio.io/resources/videos/improving-presto-performance-with-alluxio-at-tiktok/), and Alluxio stores the files' metadata in RocksDB.
+
+## FoundationDB
+[FoundationDB](https://www.foundationdb.org/) [uses](https://github.com/apple/foundationdb/blob/377f1f692da6ab2fe5bdac57035651db3e5fb66d/fdbserver/KeyValueStoreRocksDB.actor.cpp) RocksDB to implement a [key-value store interface](https://github.com/apple/foundationdb/blob/377f1f692da6ab2fe5bdac57035651db3e5fb66d/fdbserver/KeyValueStoreRocksDB.actor.cpp#L1127) in its server backend.
+
+## Apple
+Apple [uses](https://opensource.apple.com/projects/foundationdb/) FoundationDB, so it also uses RocksDB.
+
+## Snowflake
+Snowflake [uses](https://www.snowflake.com/blog/how-foundationdb-powers-snowflake-metadata-forward/) FoundationDB, so it also uses RocksDB.
+
+## Microsoft
+The Bing search engine from Microsoft uses RocksDB as the storage engine for its web data platform: https://blogs.bing.com/Engineering-Blog/october-2021/RocksDB-in-Microsoft-Bing
+
+## LinkedIn
+Two different use cases at Linkedin are using RocksDB as a storage engine:
+
+1. LinkedIn's follow feed for storing user's activities. Check out the blog post: https://engineering.linkedin.com/blog/2016/03/followfeed--linkedin-s-feed-made-faster-and-smarter
+2. Apache Samza, open source framework for stream processing
+
+Learn more about those use cases in a Tech Talk by Ankit Gupta and Naveen Somasundaram: http://www.youtube.com/watch?v=plqVp_OnSzg
+
+## Yahoo
+Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights
+
+## Tencent
+[PaxosStore](https://github.com/Tencent/paxosstore) is a distributed database supporting WeChat. It uses RocksDB as its storage engine.
+
+## Baidu
+[Apache Doris](http://doris.apache.org/master/en/) is a MPP analytical database engine released by Baidu. It [uses RocksDB](http://doris.apache.org/master/en/administrator-guide/operation/tablet-meta-tool.html) to manage its tablet's metadata.
+
+## CockroachDB
+CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach
+
+## DNANexus
+DNANexus is using RocksDB to speed up processing of genomics data.
+You can learn more from this great blog post by Mike Lin: http://devblog.dnanexus.com/faster-bam-sorting-with-samtools-and-rocksdb/
+
+## Iron.io
+Iron.io is using RocksDB as a storage engine for their distributed queueing system.
+Learn more from Tech Talk by Reed Allman: http://www.youtube.com/watch?v=HTjt6oj-RL4
+
+## Tango Me
+Tango is using RocksDB as a graph storage to store all users' connection data and other social activity data.
+
+## Turn
+Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters.
+Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf
+
+## Santander UK/Cloudera Profession Services
+Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/
+
+## Airbnb
+Airbnb is using RocksDB as a storage engine for their personalized search service. You can learn more about it here: https://www.youtube.com/watch?v=ASQ6XMtogMs
+
+## Alluxio
+[Alluxio](https://www.alluxio.io) uses RocksDB to serve and scale file system metadata to beyond 1 Billion files. The detailed design and implementation is described in this engineering blog:
+https://www.alluxio.io/blog/scalable-metadata-service-in-alluxio-storing-billions-of-files/
+
+## Pinterest
+Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtube.com/watch?v=MtFEVEs_2Vo
+
+## Smyte
+[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services.
+
+## Rakuten Marketing
+[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP.
+
+## VWO, Wingify
+[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed.
+
+## quasardb
+[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark.
+quasardb uses a heavily tuned RocksDB as its persistence layer.
+
+## Netflix
+[Netflix](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) Netflix uses RocksDB on AWS EC2 instances with local SSD drives to cache application data.
+
+## TiKV
+[TiKV](https://github.com/pingcap/tikv) is a GEO-replicated, high-performance, distributed, transactional key-value database. TiKV is powered by Rust and Raft. TiKV uses RocksDB as its persistence layer.
+
+## TiDB
+[TiDB](https://github.com/pingcap/tidb) uses the TiKV distributed key-value database, so it uses RocksDB.
+
+## PingCAP
+[PingCAP](https://www.pingcap.com/) is the company behind TiDB, its cloud database service uses RocksDB.
+
+## Apache Spark
+[Spark Structured Streaming](https://docs.databricks.com/structured-streaming/rocksdb-state-store.html) uses RocksDB as the local state store.
+
+## Databricks
+[Databricks](https://www.databricks.com/) [replaces AWS RDS with TiDB](https://www.pingcap.com/case-study/how-databricks-tackles-the-scalability-limit-with-a-mysql-alternative/) for scalability, so it uses RocksDB.
+
+## Apache Flink
+[Apache Flink](https://flink.apache.org/news/2016/03/08/release-1.0.0.html) uses RocksDB to store state locally on a machine.
+
+## Dgraph
+[Dgraph](https://github.com/dgraph-io/dgraph) is an open-source, scalable, distributed, low latency, high throughput Graph database .They use RocksDB to store state locally on a machine.
+
+## Uber
+[Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue.
+
+## 360 Pika
+[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been used in many companies.
+
+## LzLabs
+LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data.
+
+## ProfaneDB
+[ProfaneDB](https://profanedb.gitlab.io/) is a database for Protocol Buffers, and uses RocksDB for storage. It is accessible via gRPC, and the schema is defined using directly `.proto` files.
+
+## IOTA Foundation
+ [IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things.
+
+## Avrio Project
+ [Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions.
+
+## Crux
+[Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability.
+
+## Nebula Graph
+[Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency.
+
+## YugabyteDB
+[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/.
+
+## ArangoDB
+[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine.
+
+## Milvus
+[Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue.
+
+## Kafka
+[Kafka](https://kafka.apache.org/) is an open-source distributed event streaming platform, it uses RocksDB to store state in Kafka Streams: https://www.confluent.io/blog/how-to-tune-rocksdb-kafka-streams-state-stores-performance/.
+
+## Solana Labs
+[Solana](https://github.com/solana-labs/solana) is a fast, secure, scalable, and decentralized blockchain.  It uses RocksDB as the underlying storage for its ledger store.
+
+## Others
+More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb).
--- a/39
+++ b/39
@ -0,0 +1,39 @@
+# Vagrant file
+Vagrant.configure("2") do |config|
+
+  config.vm.provider "virtualbox" do |v|
+    v.memory = 4096
+    v.cpus = 2
+  end
+
+  config.vm.define "ubuntu14" do |box|
+    box.vm.box = "ubuntu/trusty64"
+  end
+
+  config.vm.define "centos65" do |box|
+    box.vm.box = "chef/centos-6.5"
+  end
+
+  config.vm.define "centos7" do |box|
+    box.vm.box = "centos/7"
+    box.vm.provision "shell", path: "build_tools/setup_centos7.sh"
+  end
+
+  config.vm.define "FreeBSD10" do |box|
+    box.vm.guest = :freebsd
+    box.vm.box = "robin/freebsd-10"
+    # FreeBSD does not support 'mount_virtualbox_shared_folder', use NFS
+    box.vm.synced_folder ".", "/vagrant", :nfs => true, id: "vagrant-root"
+    box.vm.network "private_network", ip: "10.0.1.10"
+
+    # build everything after creating VM, skip using --no-provision
+    box.vm.provision "shell", inline: <<-SCRIPT
+      pkg install -y gmake clang35
+      export CXX=/usr/local/bin/clang++35
+      cd /vagrant
+      gmake clean
+      gmake all OPT=-g
+    SCRIPT
+  end
+
+end
--- a/WINDOWS_PORT.md
+++ b/WINDOWS_PORT.md
@ -0,0 +1,228 @@
+# Microsoft Contribution Notes
+
+## Contributors
+* Alexander Zinoviev https://github.com/zinoale
+* Dmitri Smirnov https://github.com/yuslepukhin
+* Praveen Rao  https://github.com/PraveenSinghRao
+* Sherlock Huang  https://github.com/SherlockNoMad
+
+## Introduction
+RocksDB is a well proven open source key-value persistent store, optimized for fast storage. It provides scalability with number of CPUs and storage IOPS, to support IO-bound, in-memory and write-once workloads, most importantly, to be flexible to allow for innovation.
+
+As Microsoft Bing team we have been continuously pushing hard to improve the scalability, efficiency of platform and eventually benefit Bing end-user satisfaction.  We would like to explore the opportunity to embrace open source, RocksDB here, to use, enhance and customize for our usage, and also contribute back to the RocksDB community. Herein, we are pleased to offer this RocksDB port for Windows platform.
+
+These notes describe some decisions and changes we had to make with regards to porting RocksDB on Windows. We hope this will help both reviewers and users of the Windows port.
+We are open for comments and improvements.
+
+## OS specifics
+All of the porting, testing and benchmarking was done on Windows Server 2012 R2 Datacenter 64-bit but to the best of our knowledge there is not a specific API we used during porting that is unsupported on other Windows OS after Vista.
+
+## Porting goals
+We strive to achieve the following goals:
+* make use of the existing porting interface of RocksDB
+* make minimum [WY2]modifications within platform independent code.
+* make all unit test pass both in debug and release builds. 
+  * Note: latest introduction of SyncPoint seems to disable running db_test in Release.
+* make performance on par with published benchmarks accounting for HW differences
+* we would like to keep the port code inline with the main branch with no forking
+
+## Build system
+We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient. 
+
+At the same time it generates Visual Studio projects that are both usable from a command line and IDE.
+
+The top-level CMakeLists.txt file contains description of all targets and build rules. It also provides brief instructions on how to build the software for Windows. One more build related file is thirdparty.inc that also resides on the top level. This file must be edited to point to actual third party libraries location.
+We think that it would be beneficial to merge the existing make-based build system and the new cmake-based build system into a single one to use on all platforms.
+
+All building and testing was done for 64-bit. We have not conducted any testing for 32-bit and early reports indicate that it will not run on 32-bit.
+
+## C++ and STL notes
+We had to make some minimum changes within the portable files that either account for OS differences or the shortcomings of C++11 support in the current version of the MS compiler. Most or all of them are expected to be fixed in the upcoming compiler releases.
+
+We plan to use this port for our business purposes here at Bing and this provided business justification for this port. This also means, we do not have at present to choose the compiler version at will.
+
+* Certain headers that are not present and not necessary on Windows were simply `#ifndef OS_WIN` in a few places (`unistd.h`)
+* All posix specific headers were replaced to port/port.h which worked well
+* Replaced `dirent.h` for `port/port_dirent.h` (very few places) with the implementation of the relevant interfaces within `rocksdb::port` namespace
+* Replaced `sys/time.h` to `port/sys_time.h` (few places) implemented equivalents within `rocksdb::port`
+* `printf %z` specification is not supported on Windows. To imitate existing standards we came up with a string macro `ROCKSDB_PRIszt` which expands to `zu` on posix systems and to `Iu` on windows.
+* in class member initialization were moved to a __ctors in some cases
+* `constexpr` is not supported. We had to replace `std::numeric_limits<>::max/min()` to its C macros for constants. Sometimes we had to make class members `static const` and place a definition within a .cc file.
+* `constexpr` for functions was replaced to a template specialization (1 place)
+* Union members that have non-trivial constructors were replaced to `char[]` in one place along with bug fixes (spatial experimental feature)
+* Zero-sized arrays are deemed a non-standard extension which we converted to 1 size array and that should work well for the purposes of these classes.
+* `std::chrono` lacks nanoseconds support (fixed in the upcoming release of the STL) and we had to use `QueryPerfCounter()` within env_win.cc
+* Function local statics initialization is still not safe. Used `std::once` to mitigate within WinEnv.
+
+## Windows Environments notes
+We endeavored to make it functionally on par with posix_env. This means we replicated the functionality of the thread pool and other things as precise as possible, including:
+* Replicate posix logic using std:thread primitives.
+* Implement all posix_env disk access functionality.
+* Set `use_os_buffer=false` to disable OS disk buffering for WinWritableFile and WinRandomAccessFile.
+* Replace `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure.
+* Use `SetFileInformationByHandle` to compensate absence of `fallocate`.
+
+### In detail
+Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments.
+
+For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc.
+The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It's not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen  `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
+
+We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access.
+
+We used `SetFileInformationByHandle` both to truncate files after writing a full final page to disk and to pre-allocate disk space for faster I/O thus compensating for the absence of `fallocate` although some differences remain. For example, the pre-allocated space is not filled with zeros like on Linux, however, on a positive note, the end of file position is also not modified after pre-allocation.
+
+RocksDB renames, copies and deletes files at will even though they may be opened with another handle at the same time. We had to relax and allow nearly all the concurrent access permissions possible.
+
+## Thread-Local Storage
+Thread-Local storage plays a significant role for RocksDB performance. Rather than creating a separate implementation we chose to create inline wrappers that forward `pthread_specific` calls to Windows `Tls` interfaces within `rocksdb::port` namespace. This leaves the existing meat of the logic in tact and unchanged and just as maintainable.
+
+To mitigate the lack of thread local storage cleanup on thread-exit we added a limited amount of windows specific code within the same thread_local.cc file that injects a cleanup callback into a `"__tls"` structure within `".CRT$XLB"` data segment. This approach guarantees that the callback is invoked regardless of whether RocksDB used within an executable, standalone DLL or within another DLL.
+
+## Jemalloc usage
+
+When RocksDB is used with Jemalloc the latter needs to be initialized before any of the C++ globals or statics. To accomplish that we injected an initialization routine into `".CRT$XCT"` that is automatically invoked by the runtime before initializing static objects. je-uninit is queued to `atexit()`. 
+
+The jemalloc redirecting `new/delete` global operators are used by the linker providing certain conditions are met. See build section in these notes.
+
+## Stack Trace and Unhandled Exception Handler
+
+We decided not to implement these two features because the hosting program as a rule has these two things in it.
+We experienced no inconveniences debugging issues in the debugger or analyzing process dumps if need be and thus we did not
+see this as a priority.
+
+## Performance results
+### Setup
+All of the benchmarks are run on the same set of machines. Here are the details of the test setup:
+* 2 Intel(R) Xeon(R) E5 2450 0 @ 2.10 GHz (total 16 cores)
+* 2 XK0480GDQPH SSD Device, total 894GB free disk
+* Machine has 128 GB of RAM
+* Operating System: Windows Server 2012 R2 Datacenter
+* 100 Million keys; each key is of size 10 bytes, each value is of size 800 bytes
+* total database size is ~76GB
+* The performance result is based on RocksDB 3.11.
+* The parameters used, unless specified, were exactly the same as published in the GitHub Wiki page. 
+
+### RocksDB on flash storage
+
+#### Test 1. Bulk Load of keys in Random Order
+
+Version 3.11 
+
+* Total Run Time: 17.6 min
+* Fillrandom: 5.480 micros/op 182465 ops/sec;  142.0 MB/s
+* Compact: 486056544.000 micros/op 0 ops/sec
+
+Version 3.10 
+
+* Total Run Time: 16.2 min 
+* Fillrandom: 5.018 micros/op 199269 ops/sec;  155.1 MB/s 
+* Compact: 441313173.000 micros/op 0 ops/sec; 
+
+
+#### Test 2. Bulk Load of keys in Sequential Order
+
+Version 3.11 
+
+* Fillseq: 4.944 micros/op 202k ops/sec;  157.4 MB/s
+
+Version 3.10
+
+* Fillseq: 4.105 micros/op 243.6k ops/sec;  189.6 MB/s 
+
+
+#### Test 3. Random Write
+
+Version 3.11 
+
+* Unbuffered I/O enabled
+* Overwrite: 52.661 micros/op 18.9k ops/sec;   14.8 MB/s
+
+Version 3.10
+
+* Unbuffered I/O enabled 
+* Overwrite: 52.661 micros/op 18.9k ops/sec; 
+
+
+#### Test 4. Random Read
+
+Version 3.11 
+
+* Unbuffered I/O enabled
+* Readrandom: 15.716 micros/op 63.6k ops/sec; 49.5 MB/s 
+
+Version 3.10
+
+* Unbuffered I/O enabled 
+* Readrandom: 15.548 micros/op 64.3k ops/sec; 
+
+
+#### Test 5. Multi-threaded read and single-threaded write
+
+Version 3.11
+
+* Unbuffered I/O enabled
+* Readwhilewriting: 25.128 micros/op 39.7k ops/sec; 
+
+Version 3.10
+
+* Unbuffered I/O enabled 
+* Readwhilewriting: 24.854 micros/op 40.2k ops/sec; 
+
+
+### RocksDB In Memory 
+
+#### Test 1. Point Lookup
+
+Version 3.11
+
+80K writes/sec
+* Write Rate Achieved: 40.5k write/sec;
+* Readwhilewriting: 0.314 micros/op 3187455 ops/sec;  364.8 MB/s (715454999 of 715454999 found)
+
+Version 3.10
+
+* Write Rate Achieved:  50.6k write/sec 
+* Readwhilewriting: 0.316 micros/op 3162028 ops/sec; (719576999 of 719576999 found) 
+
+
+*10K writes/sec*
+
+Version 3.11
+
+* Write Rate Achieved: 5.8k/s write/sec
+* Readwhilewriting: 0.246 micros/op 4062669 ops/sec;  464.9 MB/s (915481999 of 915481999 found)
+
+Version 3.10
+
+* Write Rate Achieved: 5.8k/s write/sec 
+* Readwhilewriting: 0.244 micros/op 4106253 ops/sec; (927986999 of 927986999 found) 
+
+
+#### Test 2. Prefix Range Query
+
+Version 3.11
+
+80K writes/sec
+* Write Rate Achieved:  46.3k/s write/sec
+* Readwhilewriting: 0.362 micros/op 2765052 ops/sec;  316.4 MB/s (611549999 of 611549999 found)
+
+Version 3.10
+
+* Write Rate Achieved: 45.8k/s write/sec 
+* Readwhilewriting: 0.317 micros/op 3154941 ops/sec; (708158999 of 708158999 found) 
+
+Version 3.11
+
+10K writes/sec
+* Write Rate Achieved: 5.78k write/sec
+* Readwhilewriting: 0.269 micros/op 3716692 ops/sec;  425.3 MB/s (837401999 of 837401999 found)
+
+Version 3.10
+
+* Write Rate Achieved: 5.7k write/sec 
+* Readwhilewriting: 0.261 micros/op 3830152 ops/sec; (863482999 of 863482999 found) 
+
+
+We think that there is still big room to improve the performance, which will be an ongoing effort for us.
+
--- a/buckifier/bench-slow.json
+++ b/buckifier/bench-slow.json
--- a/buckifier/bench.json
+++ b/buckifier/bench.json
--- a/buckifier/buckify_rocksdb.py
+++ b/buckifier/buckify_rocksdb.py
@ -0,0 +1,333 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+try:
+    from builtins import str
+except ImportError:
+    from __builtin__ import str
+import fnmatch
+import json
+import os
+import sys
+
+from targets_builder import TARGETSBuilder
+
+from util import ColorString
+
+# This script generates TARGETS file for Buck.
+# Buck is a build tool specifying dependencies among different build targets.
+# User can pass extra dependencies as a JSON object via command line, and this
+# script can include these dependencies in the generate TARGETS file.
+# Usage:
+# $python3 buckifier/buckify_rocksdb.py
+# (This generates a TARGET file without user-specified dependency for unit
+# tests.)
+# $python3 buckifier/buckify_rocksdb.py \
+#        '{"fake": {
+#                      "extra_deps": [":test_dep", "//fakes/module:mock1"],
+#                      "extra_compiler_flags": ["-DFOO_BAR", "-Os"]
+#                  }
+#         }'
+# (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB
+# unit tests, and will use the extra_compiler_flags to compile the unit test
+# source.)
+
+# tests to export as libraries for inclusion in other projects
+_EXPORTED_TEST_LIBS = ["env_basic_test"]
+
+# Parse src.mk files as a Dictionary of
+# VAR_NAME => list of files
+def parse_src_mk(repo_path):
+    src_mk = repo_path + "/src.mk"
+    src_files = {}
+    for line in open(src_mk):
+        line = line.strip()
+        if len(line) == 0 or line[0] == "#":
+            continue
+        if "=" in line:
+            current_src = line.split("=")[0].strip()
+            src_files[current_src] = []
+        elif ".c" in line:
+            src_path = line.split("\\")[0].strip()
+            src_files[current_src].append(src_path)
+    return src_files
+
+
+# get all .cc / .c files
+def get_cc_files(repo_path):
+    cc_files = []
+    for root, _dirnames, filenames in os.walk(
+        repo_path
+    ):  # noqa: B007 T25377293 Grandfathered in
+        root = root[(len(repo_path) + 1) :]
+        if "java" in root:
+            # Skip java
+            continue
+        for filename in fnmatch.filter(filenames, "*.cc"):
+            cc_files.append(os.path.join(root, filename))
+        for filename in fnmatch.filter(filenames, "*.c"):
+            cc_files.append(os.path.join(root, filename))
+    return cc_files
+
+
+# Get non_parallel tests from Makefile
+def get_non_parallel_tests(repo_path):
+    Makefile = repo_path + "/Makefile"
+
+    s = set({})
+
+    found_non_parallel_tests = False
+    for line in open(Makefile):
+        line = line.strip()
+        if line.startswith("NON_PARALLEL_TEST ="):
+            found_non_parallel_tests = True
+        elif found_non_parallel_tests:
+            if line.endswith("\\"):
+                # remove the trailing \
+                line = line[:-1]
+                line = line.strip()
+                s.add(line)
+            else:
+                # we consumed all the non_parallel tests
+                break
+
+    return s
+
+
+# Parse extra dependencies passed by user from command line
+def get_dependencies():
+    deps_map = {"": {"extra_deps": [], "extra_compiler_flags": []}}
+    if len(sys.argv) < 2:
+        return deps_map
+
+    def encode_dict(data):
+        rv = {}
+        for k, v in data.items():
+            if isinstance(v, dict):
+                v = encode_dict(v)
+            rv[k] = v
+        return rv
+
+    extra_deps = json.loads(sys.argv[1], object_hook=encode_dict)
+    for target_alias, deps in extra_deps.items():
+        deps_map[target_alias] = deps
+    return deps_map
+
+
+# Prepare TARGETS file for buck
+def generate_targets(repo_path, deps_map):
+    print(ColorString.info("Generating TARGETS"))
+    # parsed src.mk file
+    src_mk = parse_src_mk(repo_path)
+    # get all .cc files
+    cc_files = get_cc_files(repo_path)
+    # get non_parallel tests from Makefile
+    non_parallel_tests = get_non_parallel_tests(repo_path)
+
+    if src_mk is None or cc_files is None or non_parallel_tests is None:
+        return False
+
+    extra_argv = ""
+    if len(sys.argv) >= 2:
+        # Heuristically quote and canonicalize whitespace for inclusion
+        # in how the file was generated.
+        extra_argv = " '{0}'".format(" ".join(sys.argv[1].split()))
+
+    TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv)
+
+    # rocksdb_lib
+    TARGETS.add_library(
+        "rocksdb_lib",
+        src_mk["LIB_SOURCES"] +
+        # always add range_tree, it's only excluded on ppc64, which we don't use internally
+        src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"],
+        deps=[
+            "//folly/container:f14_hash",
+            "//folly/experimental/coro:blocking_wait",
+            "//folly/experimental/coro:collect",
+            "//folly/experimental/coro:coroutine",
+            "//folly/experimental/coro:task",
+            "//folly/synchronization:distributed_mutex",
+        ],
+    )
+    # rocksdb_whole_archive_lib
+    TARGETS.add_library(
+        "rocksdb_whole_archive_lib",
+        [],
+        deps=[
+            ":rocksdb_lib",
+        ],
+        headers=None,
+        extra_external_deps="",
+        link_whole=True,
+    )
+    # rocksdb_test_lib
+    TARGETS.add_library(
+        "rocksdb_test_lib",
+        src_mk.get("MOCK_LIB_SOURCES", [])
+        + src_mk.get("TEST_LIB_SOURCES", [])
+        + src_mk.get("EXP_LIB_SOURCES", [])
+        + src_mk.get("ANALYZER_LIB_SOURCES", []),
+        [":rocksdb_lib"],
+        extra_test_libs=True,
+    )
+    # rocksdb_tools_lib
+    TARGETS.add_library(
+        "rocksdb_tools_lib",
+        src_mk.get("BENCH_LIB_SOURCES", [])
+        + src_mk.get("ANALYZER_LIB_SOURCES", [])
+        + ["test_util/testutil.cc"],
+        [":rocksdb_lib"],
+    )
+    # rocksdb_cache_bench_tools_lib
+    TARGETS.add_library(
+        "rocksdb_cache_bench_tools_lib",
+        src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
+        [":rocksdb_lib"],
+    )
+    # rocksdb_stress_lib
+    TARGETS.add_rocksdb_library(
+        "rocksdb_stress_lib",
+        src_mk.get("ANALYZER_LIB_SOURCES", [])
+        + src_mk.get("STRESS_LIB_SOURCES", [])
+        + ["test_util/testutil.cc"],
+    )
+    # db_stress binary
+    TARGETS.add_binary(
+        "db_stress", ["db_stress_tool/db_stress.cc"], [":rocksdb_stress_lib"]
+    )
+    # bench binaries
+    for src in src_mk.get("MICROBENCH_SOURCES", []):
+        name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0]
+        TARGETS.add_binary(name, [src], [], extra_bench_libs=True)
+    print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
+
+    # Dictionary test executable name -> relative source file path
+    test_source_map = {}
+
+    # c_test.c is added through TARGETS.add_c_test(). If there
+    # are more than one .c test file, we need to extend
+    # TARGETS.add_c_test() to include other C tests too.
+    for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
+        if test_src != "db/c_test.c":
+            print("Don't know how to deal with " + test_src)
+            return False
+    TARGETS.add_c_test()
+
+    try:
+        with open(f"{repo_path}/buckifier/bench.json") as json_file:
+            fast_fancy_bench_config_list = json.load(json_file)
+            for config_dict in fast_fancy_bench_config_list:
+                clean_benchmarks = {}
+                benchmarks = config_dict["benchmarks"]
+                for binary, benchmark_dict in benchmarks.items():
+                    clean_benchmarks[binary] = {}
+                    for benchmark, overloaded_metric_list in benchmark_dict.items():
+                        clean_benchmarks[binary][benchmark] = []
+                        for metric in overloaded_metric_list:
+                            if not isinstance(metric, dict):
+                                clean_benchmarks[binary][benchmark].append(metric)
+                TARGETS.add_fancy_bench_config(
+                    config_dict["name"],
+                    clean_benchmarks,
+                    False,
+                    config_dict["expected_runtime_one_iter"],
+                    config_dict["sl_iterations"],
+                    config_dict["regression_threshold"],
+                )
+
+        with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
+            slow_fancy_bench_config_list = json.load(json_file)
+            for config_dict in slow_fancy_bench_config_list:
+                clean_benchmarks = {}
+                benchmarks = config_dict["benchmarks"]
+                for binary, benchmark_dict in benchmarks.items():
+                    clean_benchmarks[binary] = {}
+                    for benchmark, overloaded_metric_list in benchmark_dict.items():
+                        clean_benchmarks[binary][benchmark] = []
+                        for metric in overloaded_metric_list:
+                            if not isinstance(metric, dict):
+                                clean_benchmarks[binary][benchmark].append(metric)
+            for config_dict in slow_fancy_bench_config_list:
+                TARGETS.add_fancy_bench_config(
+                    config_dict["name"] + "_slow",
+                    clean_benchmarks,
+                    True,
+                    config_dict["expected_runtime_one_iter"],
+                    config_dict["sl_iterations"],
+                    config_dict["regression_threshold"],
+                )
+    # it is better servicelab experiments break
+    # than rocksdb github ci
+    except Exception:
+        pass
+
+    TARGETS.add_test_header()
+
+    for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
+        test = test_src.split(".c")[0].strip().split("/")[-1].strip()
+        test_source_map[test] = test_src
+        print("" + test + " " + test_src)
+
+    for target_alias, deps in deps_map.items():
+        for test, test_src in sorted(test_source_map.items()):
+            if len(test) == 0:
+                print(ColorString.warning("Failed to get test name for %s" % test_src))
+                continue
+
+            test_target_name = test if not target_alias else test + "_" + target_alias
+
+            if test in _EXPORTED_TEST_LIBS:
+                test_library = "%s_lib" % test_target_name
+                TARGETS.add_library(
+                    test_library,
+                    [test_src],
+                    deps=[":rocksdb_test_lib"],
+                    extra_test_libs=True,
+                )
+                TARGETS.register_test(
+                    test_target_name,
+                    test_src,
+                    deps=json.dumps(deps["extra_deps"] + [":" + test_library]),
+                    extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
+                )
+            else:
+                TARGETS.register_test(
+                    test_target_name,
+                    test_src,
+                    deps=json.dumps(deps["extra_deps"] + [":rocksdb_test_lib"]),
+                    extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
+                )
+
+    print(ColorString.info("Generated TARGETS Summary:"))
+    print(ColorString.info("- %d libs" % TARGETS.total_lib))
+    print(ColorString.info("- %d binarys" % TARGETS.total_bin))
+    print(ColorString.info("- %d tests" % TARGETS.total_test))
+    return True
+
+
+def get_rocksdb_path():
+    # rocksdb = {script_dir}/..
+    script_dir = os.path.dirname(sys.argv[0])
+    script_dir = os.path.abspath(script_dir)
+    rocksdb_path = os.path.abspath(os.path.join(script_dir, "../"))
+
+    return rocksdb_path
+
+
+def exit_with_error(msg):
+    print(ColorString.error(msg))
+    sys.exit(1)
+
+
+def main():
+    deps_map = get_dependencies()
+    # Generate TARGETS file for buck
+    ok = generate_targets(get_rocksdb_path(), deps_map)
+    if not ok:
+        exit_with_error("Failed to generate TARGETS files")
+
+
+if __name__ == "__main__":
+    main()
--- a/buckifier/check_buck_targets.sh
+++ b/buckifier/check_buck_targets.sh
@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# If clang_format_diff.py command is not specfied, we assume we are able to
+# access directly without any path.
+
+TGT_DIFF=`git diff TARGETS | head -n 1`
+
+if [ ! -z "$TGT_DIFF" ]
+then
+  echo "TARGETS file has uncommitted changes. Skip this check."
+  exit 0
+fi
+
+echo Backup original TARGETS file.
+
+cp TARGETS TARGETS.bkp
+
+${PYTHON:-python3} buckifier/buckify_rocksdb.py
+
+TGT_DIFF=`git diff TARGETS | head -n 1`
+
+if [ -z "$TGT_DIFF" ]
+then
+  mv TARGETS.bkp TARGETS
+  exit 0
+else
+  echo "Please run '${PYTHON:-python3} buckifier/buckify_rocksdb.py' to update TARGETS file."
+  echo "Do not manually update TARGETS file."
+  ${PYTHON:-python3} --version
+  mv TARGETS.bkp TARGETS
+  exit 1
+fi
--- a/buckifier/rocks_test_runner.sh
+++ b/buckifier/rocks_test_runner.sh
@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# Create a tmp directory for the test to use
+TEST_DIR=$(mktemp -d /dev/shm/fbcode_rocksdb_XXXXXXX)
+# shellcheck disable=SC2068
+TEST_TMPDIR="$TEST_DIR" $@ && rm -rf "$TEST_DIR"
--- a/buckifier/targets_builder.py
+++ b/buckifier/targets_builder.py
@ -0,0 +1,150 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+try:
+    from builtins import object, str
+except ImportError:
+    from __builtin__ import object, str
+import pprint
+
+import targets_cfg
+
+
+def pretty_list(lst, indent=8):
+    if lst is None or len(lst) == 0:
+        return ""
+
+    if len(lst) == 1:
+        return '"%s"' % lst[0]
+
+    separator = '",\n%s"' % (" " * indent)
+    res = separator.join(sorted(lst))
+    res = "\n" + (" " * indent) + '"' + res + '",\n' + (" " * (indent - 4))
+    return res
+
+
+class TARGETSBuilder(object):
+    def __init__(self, path, extra_argv):
+        self.path = path
+        header = targets_cfg.rocksdb_target_header_template.format(
+            extra_argv=extra_argv
+        )
+        with open(path, "wb") as targets_file:
+            targets_file.write(header.encode("utf-8"))
+        self.total_lib = 0
+        self.total_bin = 0
+        self.total_test = 0
+        self.tests_cfg = ""
+
+    def add_library(
+        self,
+        name,
+        srcs,
+        deps=None,
+        headers=None,
+        extra_external_deps="",
+        link_whole=False,
+        external_dependencies=None,
+        extra_test_libs=False,
+    ):
+        if headers is not None:
+            headers = "[" + pretty_list(headers) + "]"
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                targets_cfg.library_template.format(
+                    name=name,
+                    srcs=pretty_list(srcs),
+                    headers=headers,
+                    deps=pretty_list(deps),
+                    extra_external_deps=extra_external_deps,
+                    link_whole=link_whole,
+                    external_dependencies=pretty_list(external_dependencies),
+                    extra_test_libs=extra_test_libs,
+                ).encode("utf-8")
+            )
+        self.total_lib = self.total_lib + 1
+
+    def add_rocksdb_library(self, name, srcs, headers=None, external_dependencies=None):
+        if headers is not None:
+            headers = "[" + pretty_list(headers) + "]"
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                targets_cfg.rocksdb_library_template.format(
+                    name=name,
+                    srcs=pretty_list(srcs),
+                    headers=headers,
+                    external_dependencies=pretty_list(external_dependencies),
+                ).encode("utf-8")
+            )
+        self.total_lib = self.total_lib + 1
+
+    def add_binary(
+        self,
+        name,
+        srcs,
+        deps=None,
+        extra_preprocessor_flags=None,
+        extra_bench_libs=False,
+    ):
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                targets_cfg.binary_template.format(
+                    name=name,
+                    srcs=pretty_list(srcs),
+                    deps=pretty_list(deps),
+                    extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
+                    extra_bench_libs=extra_bench_libs,
+                ).encode("utf-8")
+            )
+        self.total_bin = self.total_bin + 1
+
+    def add_c_test(self):
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                b"""
+add_c_test_wrapper()
+"""
+            )
+
+    def add_test_header(self):
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                b"""
+        # Generate a test rule for each entry in ROCKS_TESTS
+        # Do not build the tests in opt mode, since SyncPoint and other test code
+        # will not be included.
+"""
+            )
+
+    def add_fancy_bench_config(
+        self,
+        name,
+        bench_config,
+        slow,
+        expected_runtime,
+        sl_iterations,
+        regression_threshold,
+    ):
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                targets_cfg.fancy_bench_template.format(
+                    name=name,
+                    bench_config=pprint.pformat(bench_config),
+                    slow=slow,
+                    expected_runtime=expected_runtime,
+                    sl_iterations=sl_iterations,
+                    regression_threshold=regression_threshold,
+                ).encode("utf-8")
+            )
+
+    def register_test(self, test_name, src, deps, extra_compiler_flags):
+        with open(self.path, "ab") as targets_file:
+            targets_file.write(
+                targets_cfg.unittests_template.format(
+                    test_name=test_name,
+                    test_cc=str(src),
+                    deps=deps,
+                    extra_compiler_flags=extra_compiler_flags,
+                ).encode("utf-8")
+            )
+        self.total_test = self.total_test + 1
--- a/buckifier/targets_cfg.py
+++ b/buckifier/targets_cfg.py
@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+rocksdb_target_header_template = """# This file \100generated by:
+#$ python3 buckifier/buckify_rocksdb.py{extra_argv}
+# --> DO NOT EDIT MANUALLY <--
+# This file is a Facebook-specific integration for buck builds, so can
+# only be validated by Facebook employees.
+#
+# @noautodeps @nocodemods
+load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper")
+
+"""
+
+
+library_template = """
+cpp_library_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], headers={headers}, link_whole={link_whole}, extra_test_libs={extra_test_libs})
+"""
+
+rocksdb_library_template = """
+rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
+
+"""
+
+
+binary_template = """
+cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
+"""
+
+unittests_template = """
+cpp_unittest_wrapper(name="{test_name}",
+            srcs=["{test_cc}"],
+            deps={deps},
+            extra_compiler_flags={extra_compiler_flags})
+
+"""
+
+fancy_bench_template = """
+fancy_bench_wrapper(suite_name="{name}", binary_to_bench_to_metric_list_map={bench_config}, slow={slow}, expected_runtime={expected_runtime}, sl_iterations={sl_iterations}, regression_threshold={regression_threshold})
+
+"""
--- a/buckifier/util.py
+++ b/buckifier/util.py
@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+"""
+This module keeps commonly used components.
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+try:
+    from builtins import object
+except ImportError:
+    from __builtin__ import object
+import os
+import subprocess
+import sys
+import time
+
+
+class ColorString(object):
+    """Generate colorful strings on terminal"""
+
+    HEADER = "\033[95m"
+    BLUE = "\033[94m"
+    GREEN = "\033[92m"
+    WARNING = "\033[93m"
+    FAIL = "\033[91m"
+    ENDC = "\033[0m"
+
+    @staticmethod
+    def _make_color_str(text, color):
+        # In Python2, default encoding for unicode string is ASCII
+        if sys.version_info.major <= 2:
+            return "".join([color, text.encode("utf-8"), ColorString.ENDC])
+        # From Python3, default encoding for unicode string is UTF-8
+        return "".join([color, text, ColorString.ENDC])
+
+    @staticmethod
+    def ok(text):
+        if ColorString.is_disabled:
+            return text
+        return ColorString._make_color_str(text, ColorString.GREEN)
+
+    @staticmethod
+    def info(text):
+        if ColorString.is_disabled:
+            return text
+        return ColorString._make_color_str(text, ColorString.BLUE)
+
+    @staticmethod
+    def header(text):
+        if ColorString.is_disabled:
+            return text
+        return ColorString._make_color_str(text, ColorString.HEADER)
+
+    @staticmethod
+    def error(text):
+        if ColorString.is_disabled:
+            return text
+        return ColorString._make_color_str(text, ColorString.FAIL)
+
+    @staticmethod
+    def warning(text):
+        if ColorString.is_disabled:
+            return text
+        return ColorString._make_color_str(text, ColorString.WARNING)
+
+    is_disabled = False
+
+
+def run_shell_command(shell_cmd, cmd_dir=None):
+    """Run a single shell command.
+    @returns a tuple of shell command return code, stdout, stderr"""
+
+    if cmd_dir is not None and not os.path.exists(cmd_dir):
+        run_shell_command("mkdir -p %s" % cmd_dir)
+
+    start = time.time()
+    print("\t>>> Running: " + shell_cmd)
+    p = subprocess.Popen(  # noqa
+        shell_cmd,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        cwd=cmd_dir,
+    )
+    stdout, stderr = p.communicate()
+    end = time.time()
+
+    # Report time if we spent more than 5 minutes executing a command
+    execution_time = end - start
+    if execution_time > (60 * 5):
+        mins = execution_time / 60
+        secs = execution_time % 60
+        print("\t>time spent: %d minutes %d seconds" % (mins, secs))
+
+    return p.returncode, stdout, stderr
+
+
+def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False):
+    """Execute a sequence of shell commands, which is equivalent to
+    running `cmd1 && cmd2 && cmd3`
+    @returns boolean indication if all commands succeeds.
+    """
+
+    if cmd_dir:
+        print("\t=== Set current working directory => %s" % cmd_dir)
+
+    for shell_cmd in shell_cmds:
+        ret_code, stdout, stderr = run_shell_command(shell_cmd, cmd_dir)
+        if stdout:
+            if verbose or ret_code != 0:
+                print(ColorString.info("stdout: \n"), stdout)
+        if stderr:
+            # contents in stderr is not necessarily to be error messages.
+            if verbose or ret_code != 0:
+                print(ColorString.error("stderr: \n"), stderr)
+        if ret_code != 0:
+            return False
+
+    return True
--- a/build_tools/amalgamate.py
+++ b/build_tools/amalgamate.py
@ -0,0 +1,168 @@
+#!/usr/bin/python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+# amalgamate.py creates an amalgamation from a unity build.
+# It can be run with either Python 2 or 3.
+# An amalgamation consists of a header that includes the contents of all public
+# headers and a source file that includes the contents of all source files and
+# private headers.
+#
+# This script works by starting with the unity build file and recursively expanding
+# #include directives. If the #include is found in a public include directory,
+# that header is expanded into the amalgamation header.
+#
+# A particular header is only expanded once, so this script will
+# break if there are multiple inclusions of the same header that are expected to
+# expand differently. Similarly, this type of code causes issues:
+#
+# #ifdef FOO
+#   #include "bar.h"
+#   // code here
+# #else
+#   #include "bar.h"            // oops, doesn't get expanded
+#   // different code here
+# #endif
+#
+# The solution is to move the include out of the #ifdef.
+
+from __future__ import print_function
+
+import argparse
+import re
+import sys
+from os import path
+
+include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$')
+included = set()
+excluded = set()
+
+
+def find_header(name, abs_path, include_paths):
+    samedir = path.join(path.dirname(abs_path), name)
+    if path.exists(samedir):
+        return samedir
+    for include_path in include_paths:
+        include_path = path.join(include_path, name)
+        if path.exists(include_path):
+            return include_path
+    return None
+
+
+def expand_include(
+    include_path,
+    f,
+    abs_path,
+    source_out,
+    header_out,
+    include_paths,
+    public_include_paths,
+):
+    if include_path in included:
+        return False
+
+    included.add(include_path)
+    with open(include_path) as f:
+        print('#line 1 "{}"'.format(include_path), file=source_out)
+        process_file(
+            f, include_path, source_out, header_out, include_paths, public_include_paths
+        )
+    return True
+
+
+def process_file(
+    f, abs_path, source_out, header_out, include_paths, public_include_paths
+):
+    for (line, text) in enumerate(f):
+        m = include_re.match(text)
+        if m:
+            filename = m.groups()[0]
+            # first check private headers
+            include_path = find_header(filename, abs_path, include_paths)
+            if include_path:
+                if include_path in excluded:
+                    source_out.write(text)
+                    expanded = False
+                else:
+                    expanded = expand_include(
+                        include_path,
+                        f,
+                        abs_path,
+                        source_out,
+                        header_out,
+                        include_paths,
+                        public_include_paths,
+                    )
+            else:
+                # now try public headers
+                include_path = find_header(filename, abs_path, public_include_paths)
+                if include_path:
+                    # found public header
+                    expanded = False
+                    if include_path in excluded:
+                        source_out.write(text)
+                    else:
+                        expand_include(
+                            include_path,
+                            f,
+                            abs_path,
+                            header_out,
+                            None,
+                            public_include_paths,
+                            [],
+                        )
+                else:
+                    sys.exit(
+                        "unable to find {}, included in {} on line {}".format(
+                            filename, abs_path, line
+                        )
+                    )
+
+            if expanded:
+                print('#line {} "{}"'.format(line + 1, abs_path), file=source_out)
+        elif text != "#pragma once\n":
+            source_out.write(text)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Transform a unity build into an amalgamation"
+    )
+    parser.add_argument("source", help="source file")
+    parser.add_argument(
+        "-I",
+        action="append",
+        dest="include_paths",
+        help="include paths for private headers",
+    )
+    parser.add_argument(
+        "-i",
+        action="append",
+        dest="public_include_paths",
+        help="include paths for public headers",
+    )
+    parser.add_argument(
+        "-x", action="append", dest="excluded", help="excluded header files"
+    )
+    parser.add_argument("-o", dest="source_out", help="output C++ file", required=True)
+    parser.add_argument(
+        "-H", dest="header_out", help="output C++ header file", required=True
+    )
+    args = parser.parse_args()
+
+    include_paths = list(map(path.abspath, args.include_paths or []))
+    public_include_paths = list(map(path.abspath, args.public_include_paths or []))
+    excluded.update(map(path.abspath, args.excluded or []))
+    filename = args.source
+    abs_path = path.abspath(filename)
+    with open(filename) as f, open(args.source_out, "w") as source_out, open(
+        args.header_out, "w"
+    ) as header_out:
+        print('#line 1 "{}"'.format(filename), file=source_out)
+        print('#include "{}"'.format(header_out.name), file=source_out)
+        process_file(
+            f, abs_path, source_out, header_out, include_paths, public_include_paths
+        )
+
+
+if __name__ == "__main__":
+    main()
--- a/build_tools/benchmark_log_tool.py
+++ b/build_tools/benchmark_log_tool.py
@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+#  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+#  This source code is licensed under both the GPLv2 (found in the
+#  COPYING file in the root directory) and Apache 2.0 License
+#  (found in the LICENSE.Apache file in the root directory).
+
+"""Access the results of benchmark runs
+Send these results on to OpenSearch graphing service
+"""
+
+import argparse
+import itertools
+import logging
+import os
+import re
+import sys
+
+import requests
+from dateutil import parser
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class Configuration:
+    opensearch_user = os.environ["ES_USER"]
+    opensearch_pass = os.environ["ES_PASS"]
+
+
+class BenchmarkResultException(Exception):
+    def __init__(self, message, content):
+        super().__init__(self, message)
+        self.content = content
+
+
+class BenchmarkUtils:
+
+    expected_keys = [
+        "ops_sec",
+        "mb_sec",
+        "lsm_sz",
+        "blob_sz",
+        "c_wgb",
+        "w_amp",
+        "c_mbps",
+        "c_wsecs",
+        "c_csecs",
+        "b_rgb",
+        "b_wgb",
+        "usec_op",
+        "p50",
+        "p99",
+        "p99.9",
+        "p99.99",
+        "pmax",
+        "uptime",
+        "stall%",
+        "Nstall",
+        "u_cpu",
+        "s_cpu",
+        "rss",
+        "test",
+        "date",
+        "version",
+        "job_id",
+    ]
+
+    def sanity_check(row):
+        if "test" not in row:
+            logging.debug(f"not 'test' in row: {row}")
+            return False
+        if row["test"] == "":
+            logging.debug(f"row['test'] == '': {row}")
+            return False
+        if "date" not in row:
+            logging.debug(f"not 'date' in row: {row}")
+            return False
+        if "ops_sec" not in row:
+            logging.debug(f"not 'ops_sec' in row: {row}")
+            return False
+        try:
+            _ = int(row["ops_sec"])
+        except (ValueError, TypeError):
+            logging.debug(f"int(row['ops_sec']): {row}")
+            return False
+        try:
+            (_, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
+        except (parser.ParserError):
+            logging.error(
+                f"parser.parse((row['date']): not a valid format for date in row: {row}"
+            )
+            return False
+        return True
+
+    def conform_opensearch(row):
+        (dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
+        # create a test_date field, which was previously what was expected
+        # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
+        # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
+        row["test_date"] = dt.isoformat()
+        row["date"] = dt.isoformat()
+        return {key.replace(".", "_"): value for key, value in row.items()}
+
+
+class ResultParser:
+    def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"):
+        self.field = re.compile(field)
+        self.intra = re.compile(intrafield)
+        self.sep = re.compile(separator)
+
+    def ignore(self, l_in: str):
+        if len(l_in) == 0:
+            return True
+        if l_in[0:1] == "#":
+            return True
+        return False
+
+    def line(self, line_in: str):
+        """Parse a line into items
+        Being clever about separators
+        """
+        line = line_in
+        row = []
+        while line != "":
+            match_item = self.field.match(line)
+            if match_item:
+                item = match_item.group(0)
+                row.append(item)
+                line = line[len(item) :]
+            else:
+                match_intra = self.intra.match(line)
+                if match_intra:
+                    intra = match_intra.group(0)
+                    # Count the separators
+                    # If there are >1 then generate extra blank fields
+                    # White space with no true separators fakes up a single separator
+                    tabbed = self.sep.split(intra)
+                    sep_count = len(tabbed) - 1
+                    if sep_count == 0:
+                        sep_count = 1
+                    for _ in range(sep_count - 1):
+                        row.append("")
+                    line = line[len(intra) :]
+                else:
+                    raise BenchmarkResultException(
+                        "Invalid TSV line", f"{line_in} at {line}"
+                    )
+        return row
+
+    def parse(self, lines):
+        """Parse something that iterates lines"""
+        rows = [self.line(line) for line in lines if not self.ignore(line)]
+        header = rows[0]
+        width = len(header)
+        records = [
+            {k: v for (k, v) in itertools.zip_longest(header, row[:width])}
+            for row in rows[1:]
+        ]
+        return records
+
+
+def load_report_from_tsv(filename: str):
+    file = open(filename, "r")
+    contents = file.readlines()
+    file.close()
+    parser = ResultParser()
+    report = parser.parse(contents)
+    logging.debug(f"Loaded TSV Report: {report}")
+    return report
+
+
+def push_report_to_opensearch(report, esdocument):
+    sanitized = [
+        BenchmarkUtils.conform_opensearch(row)
+        for row in report
+        if BenchmarkUtils.sanity_check(row)
+    ]
+    logging.debug(
+        f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch"
+    )
+    for single_benchmark in sanitized:
+        logging.debug(f"upload benchmark: {single_benchmark}")
+        response = requests.post(
+            esdocument,
+            json=single_benchmark,
+            auth=(os.environ["ES_USER"], os.environ["ES_PASS"]),
+        )
+        logging.debug(
+            f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}"
+        )
+        response.raise_for_status()
+
+
+def push_report_to_null(report):
+
+    for row in report:
+        if BenchmarkUtils.sanity_check(row):
+            logging.debug(f"row {row}")
+            conformed = BenchmarkUtils.conform_opensearch(row)
+            logging.debug(f"conformed row {conformed}")
+
+
+def main():
+    """Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
+    This tool will
+
+    (1) Open a local tsv benchmark report file
+    (2) Upload to OpenSearch document, via https/JSON
+    """
+
+    parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.")
+
+    # --tsvfile is the name of the file to read results from
+    # --esdocument is the ElasticSearch document to push these results into
+    #
+    parser.add_argument(
+        "--tsvfile",
+        default="build_tools/circle_api_scraper_input.txt",
+        help="File from which to read tsv report",
+    )
+    parser.add_argument(
+        "--esdocument",
+        help="ElasticSearch/OpenSearch document URL to upload report into",
+    )
+    parser.add_argument(
+        "--upload", choices=["opensearch", "none"], default="opensearch"
+    )
+
+    args = parser.parse_args()
+    logging.debug(f"Arguments: {args}")
+    reports = load_report_from_tsv(args.tsvfile)
+    if args.upload == "opensearch":
+        push_report_to_opensearch(reports, args.esdocument)
+    else:
+        push_report_to_null(reports)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@ -0,0 +1,811 @@
+#!/usr/bin/env bash
+#
+# Detects OS we're compiling on and outputs a file specified by the first
+# argument, which in turn gets read while processing Makefile.
+#
+# The output will set the following variables:
+#   CC                          C Compiler path
+#   CXX                         C++ Compiler path
+#   PLATFORM_LDFLAGS            Linker flags
+#   JAVA_LDFLAGS                Linker flags for RocksDBJava
+#   JAVA_STATIC_LDFLAGS         Linker flags for RocksDBJava static build
+#   JAVAC_ARGS                  Arguments for javac
+#   PLATFORM_SHARED_EXT         Extension for shared libraries
+#   PLATFORM_SHARED_LDFLAGS     Flags for building shared library
+#   PLATFORM_SHARED_CFLAGS      Flags for compiling objects for shared library
+#   PLATFORM_CCFLAGS            C compiler flags
+#   PLATFORM_CXXFLAGS           C++ compiler flags.  Will contain:
+#   PLATFORM_SHARED_VERSIONED   Set to 'true' if platform supports versioned
+#                               shared libraries, empty otherwise.
+#   FIND			Command for the find utility
+#   WATCH			Command for the watch utility
+#
+# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following:
+#
+#       -DROCKSDB_PLATFORM_POSIX    if posix-platform based
+#       -DSNAPPY                    if the Snappy library is present
+#       -DLZ4                       if the LZ4 library is present
+#       -DZSTD                      if the ZSTD library is present
+#       -DNUMA                      if the NUMA library is present
+#       -DTBB                       if the TBB library is present
+#       -DMEMKIND                   if the memkind library is present
+#
+# Using gflags in rocksdb:
+# Our project depends on gflags, which requires users to take some extra steps
+# before they can compile the whole repository:
+#   1. Install gflags. You may download it from here:
+#      https://gflags.github.io/gflags/ (Mac users can `brew install gflags`)
+#   2. Once installed, add the include path for gflags to your CPATH env var and
+#      the lib path to LIBRARY_PATH. If installed with default settings, the lib
+#      will be /usr/local/lib and the include path will be /usr/local/include
+
+OUTPUT=$1
+if test -z "$OUTPUT"; then
+  echo "usage: $0 <output-filename>" >&2
+  exit 1
+fi
+
+# we depend on C++17, but should be compatible with newer standards
+if [ "$ROCKSDB_CXX_STANDARD" ]; then
+  PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD"
+else
+  PLATFORM_CXXFLAGS="-std=c++17"
+fi
+
+# we currently depend on POSIX platform
+COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX"
+
+# Default to fbcode gcc on internal fb machines
+if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
+    FBCODE_BUILD="true"
+    # If we're compiling with TSAN or shared lib, we need pic build
+    PIC_BUILD=$COMPILE_WITH_TSAN
+    if [ "$LIB_MODE" == "shared" ]; then
+      PIC_BUILD=1
+    fi
+    source "$PWD/build_tools/fbcode_config_platform010.sh"
+fi
+
+# Delete existing output, if it exists
+rm -f "$OUTPUT"
+touch "$OUTPUT"
+
+if test -z "$CC"; then
+    if [ -x "$(command -v cc)" ]; then
+        CC=cc
+    elif [ -x "$(command -v clang)" ]; then
+        CC=clang
+    else
+        CC=cc
+    fi
+fi
+
+if test -z "$CXX"; then
+    if [ -x "$(command -v g++)" ]; then
+        CXX=g++
+    elif [ -x "$(command -v clang++)" ]; then
+        CXX=clang++
+    else
+        CXX=g++
+    fi
+fi
+
+if test -z "$AR"; then
+    if [ -x "$(command -v gcc-ar)" ]; then
+        AR=gcc-ar
+    elif [ -x "$(command -v llvm-ar)" ]; then
+        AR=llvm-ar
+    else
+        AR=ar
+    fi
+fi
+
+# Detect OS
+if test -z "$TARGET_OS"; then
+    TARGET_OS=`uname -s`
+fi
+
+if test -z "$TARGET_ARCHITECTURE"; then
+    TARGET_ARCHITECTURE=`uname -m`
+fi
+
+if test -z "$CLANG_SCAN_BUILD"; then
+    CLANG_SCAN_BUILD=scan-build
+fi
+
+if test -z "$CLANG_ANALYZER"; then
+    CLANG_ANALYZER=$(command -v clang++ 2> /dev/null)
+fi
+
+if test -z "$FIND"; then
+    FIND=find
+fi
+
+if test -z "$WATCH"; then
+    WATCH=watch
+fi
+
+COMMON_FLAGS="$COMMON_FLAGS ${CFLAGS}"
+CROSS_COMPILE=
+PLATFORM_CCFLAGS=
+PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS"
+PLATFORM_SHARED_EXT="so"
+PLATFORM_SHARED_LDFLAGS="-Wl,--no-as-needed -shared -Wl,-soname -Wl,"
+PLATFORM_SHARED_CFLAGS="-fPIC"
+PLATFORM_SHARED_VERSIONED=true
+
+# generic port files (working on all platform by #ifdef) go directly in /port
+GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "`
+
+# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp
+case "$TARGET_OS" in
+    Darwin)
+        PLATFORM=OS_MACOSX
+        COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX"
+        PLATFORM_SHARED_EXT=dylib
+        PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name "
+        # PORT_FILES=port/darwin/darwin_specific.cc
+        ;;
+    IOS)
+        PLATFORM=IOS
+        COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX -DIOS_CROSS_COMPILE "
+        PLATFORM_SHARED_EXT=dylib
+        PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name "
+        CROSS_COMPILE=true
+        PLATFORM_SHARED_VERSIONED=
+        ;;
+    Linux)
+        PLATFORM=OS_LINUX
+        COMMON_FLAGS="$COMMON_FLAGS -DOS_LINUX"
+        if [ -z "$USE_CLANG" ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
+        else
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
+        fi
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -ldl"
+        if test -z "$ROCKSDB_USE_IO_URING"; then
+            ROCKSDB_USE_IO_URING=1
+        fi
+        if test "$ROCKSDB_USE_IO_URING" -ne 0; then
+            # check for liburing
+            $CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o test.o 2>/dev/null  <<EOF
+              #include <liburing.h>
+              int main() {
+                struct io_uring ring;
+                io_uring_queue_init(1, &ring, 0);
+                return 0;
+              }
+EOF
+            if [ "$?" = 0 ]; then
+                PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -luring"
+                COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT"
+            fi
+        fi
+        # PORT_FILES=port/linux/linux_specific.cc
+        ;;
+    SunOS)
+        PLATFORM=OS_SOLARIS
+        COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS -m64"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -static-libstdc++ -static-libgcc -m64"
+        # PORT_FILES=port/sunos/sunos_specific.cc
+        ;;
+    AIX)
+        PLATFORM=OS_AIX
+        CC=gcc
+        COMMON_FLAGS="$COMMON_FLAGS -maix64 -pthread -fno-builtin-memcmp -D_REENTRANT -DOS_AIX -D__STDC_FORMAT_MACROS"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread -lpthread -lrt -maix64 -static-libstdc++ -static-libgcc"
+        # PORT_FILES=port/aix/aix_specific.cc
+        ;;
+    FreeBSD)
+        PLATFORM=OS_FREEBSD
+        CXX=clang++
+        COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread"
+        # PORT_FILES=port/freebsd/freebsd_specific.cc
+        ;;
+    GNU/kFreeBSD)
+        PLATFORM=OS_GNU_KFREEBSD
+        COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD"
+        if [ -z "$USE_CLANG" ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
+        else
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
+        fi
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
+        # PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc
+        ;;
+    NetBSD)
+        PLATFORM=OS_NETBSD
+        COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lgcc_s"
+        # PORT_FILES=port/netbsd/netbsd_specific.cc
+        ;;
+    OpenBSD)
+        PLATFORM=OS_OPENBSD
+	CXX=clang++
+        COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread"
+        # PORT_FILES=port/openbsd/openbsd_specific.cc
+	FIND=gfind
+	WATCH=gnuwatch
+        ;;
+    DragonFly)
+        PLATFORM=OS_DRAGONFLYBSD
+        COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD"
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread"
+        # PORT_FILES=port/dragonfly/dragonfly_specific.cc
+        ;;
+    Cygwin)
+        PLATFORM=CYGWIN
+        PLATFORM_SHARED_CFLAGS=""
+        PLATFORM_CXXFLAGS="-std=gnu++11"
+        COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN"
+        if [ -z "$USE_CLANG" ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
+        else
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
+        fi
+        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
+        # PORT_FILES=port/linux/linux_specific.cc
+        ;;
+    OS_ANDROID_CROSSCOMPILE)
+        PLATFORM=OS_ANDROID
+	COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DROCKSDB_PLATFORM_POSIX"
+	PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS "  # All pthread features are in the Android C library
+        # PORT_FILES=port/android/android.cc
+        CROSS_COMPILE=true
+        ;;
+    *)
+        echo "Unknown platform!" >&2
+        exit 1
+esac
+
+PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
+JAVA_LDFLAGS="$PLATFORM_LDFLAGS"
+JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS"
+JAVAC_ARGS="-source 8"
+
+if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
+    # Cross-compiling; do not try any compilation tests.
+    # Also don't need any compilation tests if compiling on fbcode
+    if [ "$FBCODE_BUILD" = "true" ]; then
+      # Enable backtrace on fbcode since the necessary libraries are present
+      COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
+      FOLLY_DIR="third-party/folly"
+    fi
+    true
+else
+    if ! test $ROCKSDB_DISABLE_FALLOCATE; then
+        # Test whether fallocate is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <fcntl.h>
+          #include <linux/falloc.h>
+          int main() {
+      int fd = open("/dev/null", 0);
+      fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024);
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_SNAPPY; then
+        # Test whether Snappy library is installed
+        # http://code.google.com/p/snappy/
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <snappy.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lsnappy"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_GFLAGS; then
+        # Test whether gflags library is installed
+        # http://gflags.github.io/gflags/
+        # check if the namespace is gflags
+        if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
+          #include <gflags/gflags.h>
+          using namespace GFLAGS_NAMESPACE;
+          int main() {}
+EOF
+        then
+          COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1"
+          PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
+        # check if namespace is gflags
+        elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
+            #include <gflags/gflags.h>
+            using namespace gflags;
+            int main() {}
+EOF
+        then
+          COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags"
+          PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
+        # check if namespace is google
+        elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
+            #include <gflags/gflags.h>
+            using namespace google;
+            int main() {}
+EOF
+        then
+          COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google"
+          PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_ZLIB; then
+        # Test whether zlib library is installed
+        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <zlib.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DZLIB"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lz"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_BZIP; then
+        # Test whether bzip library is installed
+        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <bzlib.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DBZIP2"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbz2"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lbz2"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_LZ4; then
+        # Test whether lz4 library is installed
+        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <lz4.h>
+          #include <lz4hc.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DLZ4"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -llz4"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_ZSTD; then
+        # Test whether zstd library is installed
+        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null  <<EOF
+          #include <zstd.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DZSTD"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_NUMA; then
+        # Test whether numa is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -lnuma 2>/dev/null  <<EOF
+          #include <numa.h>
+          #include <numaif.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DNUMA"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lnuma"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lnuma"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_TBB; then
+        # Test whether tbb is available
+        $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ltbb 2>/dev/null  <<EOF
+          #include <tbb/tbb.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DTBB"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltbb"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -ltbb"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_JEMALLOC; then
+        # Test whether jemalloc is available
+        if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ljemalloc \
+          2>/dev/null; then
+            # This will enable some preprocessor identifiers in the Makefile
+            JEMALLOC=1
+            # JEMALLOC can be enabled either using the flag (like here) or by
+            # providing direct link to the jemalloc library
+            WITH_JEMALLOC_FLAG=1
+            # check for JEMALLOC installed with HomeBrew
+            if [ "$PLATFORM" == "OS_MACOSX" ]; then
+                if [ "$TARGET_ARCHITECTURE" = "arm64" ]; then
+                    # on M1 Macs, homebrew installs here instead of /usr/local
+                    JEMALLOC_PREFIX="/opt/homebrew"
+                else
+                    JEMALLOC_PREFIX="/usr/local"
+                fi
+                if hash brew 2>/dev/null && brew ls --versions jemalloc > /dev/null; then
+                    JEMALLOC_VER=$(brew ls --versions jemalloc | tail -n 1 | cut -f 2 -d ' ')
+                    JEMALLOC_INCLUDE="-I${JEMALLOC_PREFIX}/Cellar/jemalloc/${JEMALLOC_VER}/include"
+                    JEMALLOC_LIB="${JEMALLOC_PREFIX}/Cellar/jemalloc/${JEMALLOC_VER}/lib/libjemalloc_pic.a"
+                    PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -L${JEMALLOC_PREFIX}/lib $JEMALLOC_LIB"
+                    JAVA_LDFLAGS="$JAVA_LDFLAGS -L${JEMALLOC_PREFIX}/lib $JEMALLOC_LIB"
+                    JAVA_STATIC_LDFLAGS="$JAVA_STATIC_LDFLAGS -L${JEMALLOC_PREFIX}/lib $JEMALLOC_LIB"
+                fi
+            fi
+        fi
+    fi
+    if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then
+        # jemalloc is not available. Let's try tcmalloc
+        if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \
+          -ltcmalloc 2>/dev/null; then
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then
+        # Test whether malloc_usable_size is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <malloc.h>
+          int main() {
+            size_t res = malloc_usable_size(0);
+            (void)res;
+            return 0;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_MEMKIND; then
+        # Test whether memkind library is installed
+        $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null  <<EOF
+          #include <memkind.h>
+          int main() {
+            memkind_malloc(MEMKIND_DAX_KMEM, 1024);
+            return 0;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DMEMKIND"
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lmemkind"
+            JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then
+        # Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <pthread.h>
+          int main() {
+            int x = PTHREAD_MUTEX_ADAPTIVE_NP;
+            (void)x;
+            return 0;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_BACKTRACE; then
+        # Test whether backtrace is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <execinfo.h>
+          int main() {
+            void* frames[1];
+            backtrace_symbols(frames, backtrace(frames, 1));
+            return 0;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
+        else
+            # Test whether execinfo library is installed
+            $CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o test.o 2>/dev/null  <<EOF
+              #include <execinfo.h>
+              int main() {
+                void* frames[1];
+                backtrace_symbols(frames, backtrace(frames, 1));
+              }
+EOF
+            if [ "$?" = 0 ]; then
+                COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
+                PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lexecinfo"
+                JAVA_LDFLAGS="$JAVA_LDFLAGS -lexecinfo"
+            fi
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_PG; then
+        # Test if -pg is supported
+        $CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o test.o 2>/dev/null  <<EOF
+          int main() {
+            return 0;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            PROFILING_FLAGS=-pg
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_SYNC_FILE_RANGE; then
+        # Test whether sync_file_range is supported for compatibility with an old glibc
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <fcntl.h>
+          int main() {
+            int fd = open("/dev/null", 0);
+            sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE);
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_RANGESYNC_PRESENT"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then
+        # Test whether sched_getcpu is supported
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <sched.h>
+          int main() {
+            int cpuid = sched_getcpu();
+            (void)cpuid;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then
+        # Test whether getauxval is supported
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null  <<EOF
+          #include <sys/auxv.h>
+          int main() {
+            uint64_t auxv = getauxval(AT_HWCAP);
+            (void)auxv;
+          }
+EOF
+        if [ "$?" = 0 ]; then
+            COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_AUXV_GETAUXVAL_PRESENT"
+        fi
+    fi
+
+    if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then
+        # Test whether c++17 aligned-new is supported
+        $CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o test.o 2>/dev/null <<EOF
+            struct alignas(1024) t {int a;};
+            int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -faligned-new -DHAVE_ALIGNED_NEW"
+        fi
+    fi
+    if ! test $ROCKSDB_DISABLE_BENCHMARK; then
+        # Test whether google benchmark is available
+        $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lbenchmark -lpthread 2>/dev/null  <<EOF
+          #include <benchmark/benchmark.h>
+          int main() {}
+EOF
+        if [ "$?" = 0 ]; then
+            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark"
+        fi
+    fi
+    if test $USE_FOLLY; then
+        # Test whether libfolly library is installed
+        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null  <<EOF
+          #include <folly/synchronization/DistributedMutex.h>
+          int main() {}
+EOF
+        if [ "$?" != 0 ]; then
+          FOLLY_DIR="./third-party/folly"
+        fi
+    fi
+
+fi
+
+# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning.
+# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386
+if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then
+  # Test whether -Wshorten-64-to-32 is available
+  $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -Wshorten-64-to-32 2>/dev/null  <<EOF
+    int main() {}
+EOF
+  if [ "$?" = 0 ]; then
+    COMMON_FLAGS="$COMMON_FLAGS -Wshorten-64-to-32"
+  fi
+fi
+
+if [ "$PORTABLE" == "" ] || [ "$PORTABLE" == 0 ]; then
+  if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
+    # Tune for this POWER processor, treating '+' models as base models
+    POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+`
+    COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER "
+  elif test -n "`echo $TARGET_ARCHITECTURE | grep -e^arm -e^aarch64`"; then
+    # TODO: Handle this with approprite options.
+    COMMON_FLAGS="$COMMON_FLAGS"
+  elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then
+    COMMON_FLAGS="$COMMON_FLAGS"
+  elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
+    if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ \
+      -march=native - -o /dev/null 2>/dev/null; then
+      COMMON_FLAGS="$COMMON_FLAGS -march=native "
+    else
+      COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
+    fi
+    COMMON_FLAGS="$COMMON_FLAGS"
+  elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
+    RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
+    COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
+  elif [ "$TARGET_OS" == "IOS" ]; then
+    COMMON_FLAGS="$COMMON_FLAGS"
+  else
+    COMMON_FLAGS="$COMMON_FLAGS -march=native "
+  fi
+else
+  # PORTABLE specified
+  if [ "$PORTABLE" == 1 ]; then
+    if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
+      COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
+    elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
+      RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
+      COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
+    elif test "$USE_SSE"; then
+      # USE_SSE is DEPRECATED
+      # This is a rough approximation of the old USE_SSE behavior
+      COMMON_FLAGS="$COMMON_FLAGS -march=haswell"
+    fi
+    # Other than those cases, not setting -march= here.
+  else
+    # Assume PORTABLE is a minimum assumed cpu type, e.g. PORTABLE=haswell
+    COMMON_FLAGS="$COMMON_FLAGS -march=${PORTABLE}"
+  fi
+
+  if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then
+    # For portability compile for macOS 10.14 or newer
+    COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.14"
+    PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.14"
+    # -mmacosx-version-min must come first here.
+    PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.14 $PLATFORM_SHARED_LDFLAGS"
+    PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.14"
+    JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.14"
+    JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
+    JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
+    JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
+  fi
+fi
+
+if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
+  # check for GNU libc on ppc64
+  $CXX -x c++ - -o /dev/null 2>/dev/null <<EOF
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <gnu/libc-version.h>
+
+    int main(int argc, char *argv[]) {
+      printf("GNU libc version: %s\n", gnu_get_libc_version());
+      return 0;
+    }
+EOF
+  if [ "$?" != 0 ]; then
+      PPC_LIBC_IS_GNU=0
+  fi
+fi
+
+$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
+  #include <cstdint>
+  int main() {
+    uint64_t a = 0xffffFFFFffffFFFF;
+    __uint128_t b = __uint128_t(a) * a;
+    a = static_cast<uint64_t>(b >> 64);
+    (void)a;
+  }
+EOF
+if [ "$?" = 0 ]; then
+  COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION"
+fi
+
+if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then
+  $CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null <<EOF
+  void dummy_func() {}
+EOF
+  if [ "$?" = 0 ]; then
+    $CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o test.o 2>/dev/null
+    if [ "$?" = 0 ]; then
+      EXEC_LDFLAGS+="-ldl"
+      rm -f test_dl.o
+    fi
+  fi
+fi
+
+# check for F_FULLFSYNC
+$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null  <<EOF
+  #include <fcntl.h>
+  int main() {
+    fcntl(0, F_FULLFSYNC);
+    return 0;
+  }
+EOF
+if [ "$?" = 0 ]; then
+  COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
+fi
+
+rm -f test.o test_dl.o
+
+# Get the path for the folly installation dir
+if [ "$USE_FOLLY" ]; then
+  if [ "$FOLLY_DIR" ]; then
+    FOLLY_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-inst-dir folly`
+  fi
+fi
+
+PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
+PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
+
+VALGRIND_VER="$VALGRIND_VER"
+
+ROCKSDB_MAJOR=`build_tools/version.sh major`
+ROCKSDB_MINOR=`build_tools/version.sh minor`
+ROCKSDB_PATCH=`build_tools/version.sh patch`
+
+echo "CC=$CC" >> "$OUTPUT"
+echo "CXX=$CXX" >> "$OUTPUT"
+echo "AR=$AR" >> "$OUTPUT"
+echo "PLATFORM=$PLATFORM" >> "$OUTPUT"
+echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT"
+echo "PLATFORM_CMAKE_FLAGS=$PLATFORM_CMAKE_FLAGS" >> "$OUTPUT"
+echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT"
+echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT"
+echo "JAVA_STATIC_DEPS_CCFLAGS=$JAVA_STATIC_DEPS_CCFLAGS" >> "$OUTPUT"
+echo "JAVA_STATIC_DEPS_CXXFLAGS=$JAVA_STATIC_DEPS_CXXFLAGS" >> "$OUTPUT"
+echo "JAVA_STATIC_DEPS_LDFLAGS=$JAVA_STATIC_DEPS_LDFLAGS" >> "$OUTPUT"
+echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT"
+echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT"
+echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT"
+echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT"
+echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> "$OUTPUT"
+echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> "$OUTPUT"
+echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> "$OUTPUT"
+echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> "$OUTPUT"
+echo "EXEC_LDFLAGS=$EXEC_LDFLAGS" >> "$OUTPUT"
+echo "JEMALLOC_INCLUDE=$JEMALLOC_INCLUDE" >> "$OUTPUT"
+echo "JEMALLOC_LIB=$JEMALLOC_LIB" >> "$OUTPUT"
+echo "ROCKSDB_MAJOR=$ROCKSDB_MAJOR" >> "$OUTPUT"
+echo "ROCKSDB_MINOR=$ROCKSDB_MINOR" >> "$OUTPUT"
+echo "ROCKSDB_PATCH=$ROCKSDB_PATCH" >> "$OUTPUT"
+echo "CLANG_SCAN_BUILD=$CLANG_SCAN_BUILD" >> "$OUTPUT"
+echo "CLANG_ANALYZER=$CLANG_ANALYZER" >> "$OUTPUT"
+echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT"
+echo "FIND=$FIND" >> "$OUTPUT"
+echo "WATCH=$WATCH" >> "$OUTPUT"
+echo "FOLLY_PATH=$FOLLY_PATH" >> "$OUTPUT"
+
+# This will enable some related identifiers for the preprocessor
+if test -n "$JEMALLOC"; then
+  echo "JEMALLOC=1" >> "$OUTPUT"
+fi
+# Indicates that jemalloc should be enabled using -ljemalloc flag
+# The alternative is to porvide a direct link to the library via JEMALLOC_LIB
+# and JEMALLOC_INCLUDE
+if test -n "$WITH_JEMALLOC_FLAG"; then
+  echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
+fi
+echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
+if test -n "$USE_FOLLY"; then
+  echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT"
+fi
+if test -n "$PPC_LIBC_IS_GNU"; then
+  echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT"
+fi
--- a/build_tools/check-sources.sh
+++ b/build_tools/check-sources.sh
@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#
+# Check for some simple mistakes that should prevent commit or push
+
+BAD=""
+
+git grep -n 'namespace rocksdb' -- '*.[ch]*'
+if [ "$?" != "1" ]; then
+  echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE"
+  BAD=1
+fi
+
+git grep -n -i 'nocommit' -- ':!build_tools/check-sources.sh'
+if [ "$?" != "1" ]; then
+  echo "^^^^^ Code was not intended to be committed"
+  BAD=1
+fi
+
+git grep -n 'include <rocksdb/' -- ':!build_tools/check-sources.sh'
+if [ "$?" != "1" ]; then
+  echo '^^^^^ Use double-quotes as in #include "rocksdb/something.h"'
+  BAD=1
+fi
+
+git grep -n 'include "include/rocksdb/' -- ':!build_tools/check-sources.sh'
+if [ "$?" != "1" ]; then
+  echo '^^^^^ Use #include "rocksdb/something.h" instead of #include "include/rocksdb/something.h"'
+  BAD=1
+fi
+
+git grep -n 'using namespace' -- ':!build_tools' ':!docs' \
+    ':!third-party/folly/folly/lang/Align.h' \
+    ':!third-party/gtest-1.8.1/fused-src/gtest/gtest.h'
+if [ "$?" != "1" ]; then
+  echo '^^^^ Do not use "using namespace"'
+  BAD=1
+fi
+
+git grep -n -P "[\x80-\xFF]" -- ':!docs' ':!*.md'
+if [ "$?" != "1" ]; then
+  echo '^^^^ Use only ASCII characters in source files'
+  BAD=1
+fi
+
+if [ "$BAD" ]; then
+  exit 1
+fi
--- a/build_tools/dependencies_platform010.sh
+++ b/build_tools/dependencies_platform010.sh
@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# The file is generated using update_dependencies.sh.
+GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
+CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
+LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
+GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
+SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
+ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
+BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
+LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
+ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
+GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
+JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
+NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
+LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
+TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
+LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
+BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
+KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
+BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
+VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
+LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97
--- a/build_tools/dockerbuild.sh
+++ b/build_tools/dockerbuild.sh
@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+docker run -v $PWD:/rocks -w /rocks buildpack-deps make
--- a/build_tools/error_filter.py
+++ b/build_tools/error_filter.py
@ -0,0 +1,181 @@
+#  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+#  This source code is licensed under both the GPLv2 (found in the
+#  COPYING file in the root directory) and Apache 2.0 License
+#  (found in the LICENSE.Apache file in the root directory).
+
+"""Filter for error messages in test output:
+    - Receives merged stdout/stderr from test on stdin
+    - Finds patterns of known error messages for test name (first argument)
+    - Prints those error messages to stdout
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+import sys
+
+
+class ErrorParserBase(object):
+    def parse_error(self, line):
+        """Parses a line of test output. If it contains an error, returns a
+        formatted message describing the error; otherwise, returns None.
+        Subclasses must override this method.
+        """
+        raise NotImplementedError
+
+
+class GTestErrorParser(ErrorParserBase):
+    """A parser that remembers the last test that began running so it can print
+    that test's name upon detecting failure.
+    """
+
+    _GTEST_NAME_PATTERN = re.compile(r"\[ RUN      \] (\S+)$")
+    # format: '<filename or "unknown file">:<line #>: Failure'
+    _GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$")
+
+    def __init__(self):
+        self._last_gtest_name = "Unknown test"
+
+    def parse_error(self, line):
+        gtest_name_match = self._GTEST_NAME_PATTERN.match(line)
+        if gtest_name_match:
+            self._last_gtest_name = gtest_name_match.group(1)
+            return None
+        gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
+        if gtest_fail_match:
+            return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1))
+        return None
+
+
+class MatchErrorParser(ErrorParserBase):
+    """A simple parser that returns the whole line if it matches the pattern."""
+
+    def __init__(self, pattern):
+        self._pattern = re.compile(pattern)
+
+    def parse_error(self, line):
+        if self._pattern.match(line):
+            return line
+        return None
+
+
+class CompilerErrorParser(MatchErrorParser):
+    def __init__(self):
+        # format (compile error):
+        #   '<filename>:<line #>:<column #>: error: <error msg>'
+        # format (link error):
+        #   '<filename>:<line #>: error: <error msg>'
+        # The below regex catches both
+        super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:")
+
+
+class ScanBuildErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$")
+
+
+class DbCrashErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.")
+
+
+class WriteStressErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(WriteStressErrorParser, self).__init__(
+            r"ERROR: write_stress died with exitcode=\d+"
+        )
+
+
+class AsanErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:")
+
+
+class UbsanErrorParser(MatchErrorParser):
+    def __init__(self):
+        # format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
+        super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:")
+
+
+class ValgrindErrorParser(MatchErrorParser):
+    def __init__(self):
+        # just grab the summary, valgrind doesn't clearly distinguish errors
+        # from other log messages.
+        super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:")
+
+
+class CompatErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$")
+
+
+class TsanErrorParser(MatchErrorParser):
+    def __init__(self):
+        super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:")
+
+
+_TEST_NAME_TO_PARSERS = {
+    "punit": [CompilerErrorParser, GTestErrorParser],
+    "unit": [CompilerErrorParser, GTestErrorParser],
+    "release": [CompilerErrorParser, GTestErrorParser],
+    "unit_481": [CompilerErrorParser, GTestErrorParser],
+    "release_481": [CompilerErrorParser, GTestErrorParser],
+    "clang_unit": [CompilerErrorParser, GTestErrorParser],
+    "clang_release": [CompilerErrorParser, GTestErrorParser],
+    "clang_analyze": [CompilerErrorParser, ScanBuildErrorParser],
+    "code_cov": [CompilerErrorParser, GTestErrorParser],
+    "unity": [CompilerErrorParser, GTestErrorParser],
+    "lite": [CompilerErrorParser],
+    "lite_test": [CompilerErrorParser, GTestErrorParser],
+    "stress_crash": [CompilerErrorParser, DbCrashErrorParser],
+    "stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser],
+    "stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser],
+    "write_stress": [CompilerErrorParser, WriteStressErrorParser],
+    "asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
+    "asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
+    "asan_crash_with_atomic_flush": [
+        CompilerErrorParser,
+        AsanErrorParser,
+        DbCrashErrorParser,
+    ],
+    "asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
+    "ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
+    "ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
+    "ubsan_crash_with_atomic_flush": [
+        CompilerErrorParser,
+        UbsanErrorParser,
+        DbCrashErrorParser,
+    ],
+    "ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
+    "valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
+    "tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
+    "format_compatible": [CompilerErrorParser, CompatErrorParser],
+    "run_format_compatible": [CompilerErrorParser, CompatErrorParser],
+    "no_compression": [CompilerErrorParser, GTestErrorParser],
+    "run_no_compression": [CompilerErrorParser, GTestErrorParser],
+    "regression": [CompilerErrorParser],
+    "run_regression": [CompilerErrorParser],
+}
+
+
+def main():
+    if len(sys.argv) != 2:
+        return "Usage: %s <test name>" % sys.argv[0]
+    test_name = sys.argv[1]
+    if test_name not in _TEST_NAME_TO_PARSERS:
+        return "Unknown test name: %s" % test_name
+
+    error_parsers = []
+    for parser_cls in _TEST_NAME_TO_PARSERS[test_name]:
+        error_parsers.append(parser_cls())
+
+    for line in sys.stdin:
+        line = line.strip()
+        for error_parser in error_parsers:
+            error_msg = error_parser.parse_error(line)
+            if error_msg is not None:
+                print(error_msg)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/build_tools/fb_compile_mongo.sh
+++ b/build_tools/fb_compile_mongo.sh
@ -0,0 +1,55 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# fail early
+set -e
+
+if test -z $ROCKSDB_PATH; then
+  ROCKSDB_PATH=~/rocksdb
+fi
+source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh
+
+EXTRA_LDFLAGS=""
+
+if test -z $ALLOC; then
+  # default
+  ALLOC=tcmalloc
+elif [[ $ALLOC == "jemalloc" ]]; then
+  ALLOC=system
+  EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive"
+fi
+
+# we need to force mongo to use static library, not shared
+STATIC_LIB_DEP_DIR='build/static_library_dependencies'
+test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR
+test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR
+test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR
+
+EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR"
+
+set -x
+
+EXTRA_CMD=""
+if ! test -e version.json; then
+  # this is Mongo 3.0
+  EXTRA_CMD="--rocksdb \
+    --variant-dir=linux2/norm
+    --cxx=${CXX} \
+    --cc=${CC} \
+    --use-system-zlib"  # add this line back to normal code path
+                        # when https://jira.mongodb.org/browse/SERVER-19123 is resolved
+fi
+
+scons \
+  LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \
+  CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \
+  LIBS="lz4 gcc stdc++" \
+  LIBPATH="$ROCKSDB_PATH" \
+  CPPPATH="$ROCKSDB_PATH/include" \
+  -j32 \
+  --allocator=$ALLOC \
+  --nostrip \
+  --opt=on \
+  --disable-minimum-compiler-version-enforcement \
+  --use-system-snappy \
+  --disable-warnings-as-errors \
+  $EXTRA_CMD $*
--- a/build_tools/fbcode_config.sh
+++ b/build_tools/fbcode_config.sh
@ -0,0 +1,175 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#
+# Set environment variables so that we can compile rocksdb using
+# fbcode settings.  It uses the latest g++ and clang compilers and also
+# uses jemalloc
+# Environment variables that change the behavior of this script:
+# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
+
+
+BASEDIR=`dirname $BASH_SOURCE`
+source "$BASEDIR/dependencies.sh"
+
+CFLAGS=""
+
+# libgcc
+LIBGCC_INCLUDE="$LIBGCC_BASE/include"
+LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
+
+# glibc
+GLIBC_INCLUDE="$GLIBC_BASE/include"
+GLIBC_LIBS=" -L $GLIBC_BASE/lib"
+
+if ! test $ROCKSDB_DISABLE_SNAPPY; then
+  # snappy
+  SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
+  if test -z $PIC_BUILD; then
+    SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
+  else
+    SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
+  fi
+  CFLAGS+=" -DSNAPPY"
+fi
+
+if test -z $PIC_BUILD; then
+  if ! test $ROCKSDB_DISABLE_ZLIB; then
+    # location of zlib headers and libraries
+    ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
+    ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
+    CFLAGS+=" -DZLIB"
+  fi
+
+  if ! test $ROCKSDB_DISABLE_BZIP; then
+    # location of bzip headers and libraries
+    BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
+    BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
+    CFLAGS+=" -DBZIP2"
+  fi
+
+  if ! test $ROCKSDB_DISABLE_LZ4; then
+    LZ4_INCLUDE=" -I $LZ4_BASE/include/"
+    LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
+    CFLAGS+=" -DLZ4"
+  fi
+fi
+
+if ! test $ROCKSDB_DISABLE_ZSTD; then
+  ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
+  if test -z $PIC_BUILD; then
+    ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
+  else
+    ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
+  fi
+  CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
+fi
+
+# location of gflags headers and libraries
+GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
+if test -z $PIC_BUILD; then
+  GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
+else
+  GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
+fi
+CFLAGS+=" -DGFLAGS=gflags"
+
+# location of jemalloc
+JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
+JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
+
+if test -z $PIC_BUILD; then
+  # location of numa
+  NUMA_INCLUDE=" -I $NUMA_BASE/include/"
+  NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
+  CFLAGS+=" -DNUMA"
+
+  # location of libunwind
+  LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
+fi
+
+# location of TBB
+TBB_INCLUDE=" -isystem $TBB_BASE/include/"
+if test -z $PIC_BUILD; then
+  TBB_LIBS="$TBB_BASE/lib/libtbb.a"
+else
+  TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
+fi
+CFLAGS+=" -DTBB"
+
+test "$USE_SSE" || USE_SSE=1
+export USE_SSE
+test "$PORTABLE" || PORTABLE=1
+export PORTABLE
+
+BINUTILS="$BINUTILS_BASE/bin"
+AR="$BINUTILS/ar"
+
+DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
+
+STDLIBS="-L $GCC_BASE/lib64"
+
+CLANG_BIN="$CLANG_BASE/bin"
+CLANG_LIB="$CLANG_BASE/lib"
+CLANG_SRC="$CLANG_BASE/../../src"
+
+CLANG_ANALYZER="$CLANG_BIN/clang++"
+CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
+
+if [ -z "$USE_CLANG" ]; then
+  # gcc
+  CC="$GCC_BASE/bin/gcc"
+  CXX="$GCC_BASE/bin/g++"
+  AR="$GCC_BASE/bin/gcc-ar"
+
+  CFLAGS+=" -B$BINUTILS/gold"
+  CFLAGS+=" -isystem $GLIBC_INCLUDE"
+  CFLAGS+=" -isystem $LIBGCC_INCLUDE"
+  JEMALLOC=1
+else
+  # clang
+  CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
+  CC="$CLANG_BIN/clang"
+  CXX="$CLANG_BIN/clang++"
+  AR="$CLANG_BIN/llvm-ar"
+
+  KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
+
+  CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
+  CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x "
+  CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux "
+  CFLAGS+=" -isystem $GLIBC_INCLUDE"
+  CFLAGS+=" -isystem $LIBGCC_INCLUDE"
+  CFLAGS+=" -isystem $CLANG_INCLUDE"
+  CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
+  CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
+  CFLAGS+=" -Wno-expansion-to-defined "
+  CXXFLAGS="-nostdinc++"
+fi
+
+CFLAGS+=" $DEPS_INCLUDE"
+CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT"
+CXXFLAGS+=" $CFLAGS"
+
+EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
+EXEC_LDFLAGS+=" -B$BINUTILS/gold"
+EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so"
+EXEC_LDFLAGS+=" $LIBUNWIND"
+EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib"
+# required by libtbb
+EXEC_LDFLAGS+=" -ldl"
+
+PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
+
+EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
+
+VALGRIND_VER="$VALGRIND_BASE/bin/"
+
+LUA_PATH="$LUA_BASE"
+
+if test -z $PIC_BUILD; then
+  LUA_LIB=" $LUA_PATH/lib/liblua.a"
+else
+  LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
+fi
+
+export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
--- a/build_tools/fbcode_config_platform010.sh
+++ b/build_tools/fbcode_config_platform010.sh
@ -0,0 +1,175 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#
+# Set environment variables so that we can compile rocksdb using
+# fbcode settings.  It uses the latest g++ and clang compilers and also
+# uses jemalloc
+# Environment variables that change the behavior of this script:
+# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
+
+
+BASEDIR=`dirname $BASH_SOURCE`
+source "$BASEDIR/dependencies_platform010.sh"
+
+# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
+CFLAGS=" --sysroot=/DOES/NOT/EXIST"
+
+# libgcc
+LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
+LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
+
+# glibc
+GLIBC_INCLUDE="$GLIBC_BASE/include"
+GLIBC_LIBS=" -L $GLIBC_BASE/lib"
+GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
+
+if test -z $PIC_BUILD; then
+  MAYBE_PIC=
+else
+  MAYBE_PIC=_pic
+fi
+
+if ! test $ROCKSDB_DISABLE_SNAPPY; then
+  # snappy
+  SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
+  SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
+  CFLAGS+=" -DSNAPPY"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZLIB; then
+  # location of zlib headers and libraries
+  ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
+  ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
+  CFLAGS+=" -DZLIB"
+fi
+
+if ! test $ROCKSDB_DISABLE_BZIP; then
+  # location of bzip headers and libraries
+  BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
+  BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
+  CFLAGS+=" -DBZIP2"
+fi
+
+if ! test $ROCKSDB_DISABLE_LZ4; then
+  LZ4_INCLUDE=" -I $LZ4_BASE/include/"
+  LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
+  CFLAGS+=" -DLZ4"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZSTD; then
+  ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
+  ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
+  CFLAGS+=" -DZSTD"
+fi
+
+# location of gflags headers and libraries
+GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
+GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
+CFLAGS+=" -DGFLAGS=gflags"
+
+BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
+BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
+
+# location of jemalloc
+JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
+JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
+
+# location of numa
+NUMA_INCLUDE=" -I $NUMA_BASE/include/"
+NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
+CFLAGS+=" -DNUMA"
+
+# location of libunwind
+LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
+
+# location of TBB
+TBB_INCLUDE=" -isystem $TBB_BASE/include/"
+TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
+CFLAGS+=" -DTBB"
+
+# location of LIBURING
+LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
+LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
+CFLAGS+=" -DLIBURING"
+
+test "$USE_SSE" || USE_SSE=1
+export USE_SSE
+test "$PORTABLE" || PORTABLE=1
+export PORTABLE
+
+BINUTILS="$BINUTILS_BASE/bin"
+AR="$BINUTILS/ar"
+AS="$BINUTILS/as"
+
+DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
+
+STDLIBS="-L $GCC_BASE/lib64"
+
+CLANG_BIN="$CLANG_BASE/bin"
+CLANG_LIB="$CLANG_BASE/lib"
+CLANG_SRC="$CLANG_BASE/../../src"
+
+CLANG_ANALYZER="$CLANG_BIN/clang++"
+CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
+
+if [ -z "$USE_CLANG" ]; then
+  # gcc
+  CC="$GCC_BASE/bin/gcc"
+  CXX="$GCC_BASE/bin/g++"
+  AR="$GCC_BASE/bin/gcc-ar"
+
+  CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
+  CFLAGS+=" -I$GCC_BASE/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
+  CFLAGS+=" -isystem $LIBGCC_INCLUDE"
+  CFLAGS+=" -isystem $GLIBC_INCLUDE"
+  CFLAGS+=" -I$GLIBC_INCLUDE"
+  CFLAGS+=" -I$LIBGCC_BASE/include"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
+  CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
+  JEMALLOC=1
+else
+  # clang
+  CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
+  CC="$CLANG_BIN/clang"
+  CXX="$CLANG_BIN/clang++"
+  AR="$CLANG_BIN/llvm-ar"
+
+  CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
+  CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
+  CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
+  CFLAGS+=" -isystem $GLIBC_INCLUDE"
+  CFLAGS+=" -isystem $LIBGCC_INCLUDE"
+  CFLAGS+=" -isystem $CLANG_INCLUDE"
+  CFLAGS+=" -Wno-expansion-to-defined "
+  CXXFLAGS="-nostdinc++"
+fi
+
+KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
+CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
+CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
+
+CFLAGS+=" $DEPS_INCLUDE"
+CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_IOURING_PRESENT"
+CXXFLAGS+=" $CFLAGS"
+
+EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
+EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
+EXEC_LDFLAGS+=" $LIBUNWIND"
+EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
+EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
+# required by libtbb
+EXEC_LDFLAGS+=" -ldl"
+
+PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
+PLATFORM_LDFLAGS+=" -B$BINUTILS"
+
+EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
+
+VALGRIND_VER="$VALGRIND_BASE/bin/"
+
+export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
--- a/build_tools/format-diff.sh
+++ b/build_tools/format-diff.sh
@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# If clang_format_diff.py command is not specfied, we assume we are able to
+# access directly without any path.
+
+print_usage () {
+  echo "Usage:"
+  echo "format-diff.sh [OPTIONS]"
+  echo "-c: check only."
+  echo "-h: print this message."
+}
+
+while getopts ':ch' OPTION; do
+  case "$OPTION" in
+    c)
+      CHECK_ONLY=1
+      ;;
+    h)
+      print_usage
+      exit 1
+      ;;
+    ?)
+      print_usage
+      exit 1
+      ;;
+  esac
+done
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+
+if [ "$CLANG_FORMAT_DIFF" ]; then
+  echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'"
+  # Dry run to confirm dependencies like argparse
+  if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
+    true #Good
+  else
+    exit 128
+  fi
+else
+  # First try directly executing the possibilities
+  if clang-format-diff --help &> /dev/null < /dev/null; then
+    CLANG_FORMAT_DIFF=clang-format-diff
+  elif clang-format-diff.py --help &> /dev/null < /dev/null; then
+    CLANG_FORMAT_DIFF=clang-format-diff.py
+  elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then
+    CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py
+  else
+    # This probably means we need to directly invoke the interpreter.
+    # But first find clang-format-diff.py
+    if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then
+      CFD_PATH="$REPO_ROOT/clang-format-diff.py"
+    elif which clang-format-diff.py &> /dev/null; then
+      CFD_PATH="$(which clang-format-diff.py)"
+    else
+      echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!"
+      echo "You can download clang-format-diff.py by running: "
+      echo "    curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
+      echo "You should make sure the downloaded script is not compromised."
+      echo "You can download clang-format by running:"
+      echo "    brew install clang-format"
+      echo "  Or"
+      echo "    apt install clang-format"
+      echo "  This might work too:"
+      echo "    yum install git-clang-format"
+      echo "Then make sure clang-format is available and executable from \$PATH:"
+      echo "    clang-format --version"
+      exit 128
+    fi
+    # Check argparse pre-req on interpreter, or it will fail
+    if echo import argparse | ${PYTHON:-python3}; then
+      true # Good
+    else
+      echo "To run clang-format-diff.py, we'll need the library "argparse" to be"
+      echo "installed. You can try either of the follow ways to install it:"
+      echo "  1. Manually download argparse: https://pypi.python.org/pypi/argparse"
+      echo "  2. easy_install argparse (if you have easy_install)"
+      echo "  3. pip install argparse (if you have pip)"
+      exit 129
+    fi
+    # Unfortunately, some machines have a Python2 clang-format-diff.py
+    # installed but only a Python3 interpreter installed. Unfortunately,
+    # automatic 2to3 migration is insufficient, so suggest downloading latest.
+    if grep -q "print '" "$CFD_PATH" && \
+       ${PYTHON:-python3} --version | grep -q 'ython 3'; then
+      echo "You have clang-format-diff.py for Python 2 but are using a Python 3"
+      echo "interpreter (${PYTHON:-python3})."
+      echo "You can download clang-format-diff.py for Python 3 by running: "
+      echo "    curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
+      echo "You should make sure the downloaded script is not compromised."
+      exit 130
+    fi
+    CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH"
+    # This had better work after all those checks
+    if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
+      true #Good
+    else
+      exit 128
+    fi
+  fi
+fi
+
+# TODO(kailiu) following work is not complete since we still need to figure
+# out how to add the modified files done pre-commit hook to git's commit index.
+#
+# Check if this script has already been added to pre-commit hook.
+# Will suggest user to add this script to pre-commit hook if their pre-commit
+# is empty.
+# PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit"
+# if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null
+# then
+#   echo "Would you like to add this script to pre-commit hook, which will do "
+#   echo -n "the format check for all the affected lines before you check in (y/n):"
+#   read add_to_hook
+#   if [ "$add_to_hook" == "y" ]
+#   then
+#     ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH
+#   fi
+# fi
+set -e
+
+uncommitted_code=`git diff HEAD`
+
+# If there's no uncommitted changes, we assume user are doing post-commit
+# format check, in which case we'll try to check the modified lines vs. the
+# facebook/rocksdb.git main branch. Otherwise, we'll check format of the
+# uncommitted code only.
+if [ -z "$uncommitted_code" ]
+then
+  # Attempt to get name of facebook/rocksdb.git remote.
+  [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
+  # Fall back on 'origin' if that fails
+  [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin
+  # Use main branch from that remote
+  [ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')"
+  # Get the common ancestor with that remote branch. Everything after that
+  # common ancestor would be considered the contents of a pull request, so
+  # should be relevant for formatting fixes.
+  FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)"
+  # Get the differences
+  diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1)
+  echo "Checking format of changes not yet in $FORMAT_UPSTREAM..."
+else
+  # Check the format of uncommitted lines,
+  diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1)
+  echo "Checking format of uncommitted changes..."
+fi
+
+if [ -z "$diffs" ]
+then
+  echo "Nothing needs to be reformatted!"
+  exit 0
+elif [ $CHECK_ONLY ]
+then
+  echo "Your change has unformatted code. Please run make format!"
+  if [ $VERBOSE_CHECK ]; then
+    clang-format --version
+    echo "$diffs"
+  fi
+  exit 1
+fi
+
+# Highlight the insertion/deletion from the clang-format-diff.py's output
+COLOR_END="\033[0m"
+COLOR_RED="\033[0;31m"
+COLOR_GREEN="\033[0;32m"
+
+echo -e "Detect lines that doesn't follow the format rules:\r"
+# Add the color to the diff. lines added will be green; lines removed will be red.
+echo "$diffs" |
+  sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" |
+  sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/"
+
+echo -e "Would you like to fix the format automatically (y/n): \c"
+
+# Make sure under any mode, we can read user input.
+exec < /dev/tty
+read to_fix
+
+if [ "$to_fix" != "y" ]
+then
+  exit 1
+fi
+
+# Do in-place format adjustment.
+if [ -z "$uncommitted_code" ]
+then
+  git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1
+else
+  git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1
+fi
+echo "Files reformatted!"
+
+# Amend to last commit if user do the post-commit format check
+if [ -z "$uncommitted_code" ]; then
+  echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c"
+  read to_amend
+
+  if [ "$to_amend" == "y" ]
+  then
+    git commit -a --amend --reuse-message HEAD
+    echo "Amended to last commit"
+  fi
+fi
--- a/build_tools/gnu_parallel
+++ b/build_tools/gnu_parallel
--- a/build_tools/make_package.sh
+++ b/build_tools/make_package.sh
@ -0,0 +1,129 @@
+# shellcheck disable=SC1113
+#/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+set -e
+
+function log() {
+  echo "[+] $1"
+}
+
+function fatal() {
+  echo "[!] $1"
+  exit 1
+}
+
+function platform() {
+  local  __resultvar=$1
+  if [[ -f "/etc/yum.conf" ]]; then
+    eval $__resultvar="centos"
+  elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then
+    eval $__resultvar="ubuntu"
+  else
+    fatal "Unknwon operating system"
+  fi
+}
+platform OS
+
+function package() {
+  if [[ $OS = "ubuntu" ]]; then
+    if dpkg --get-selections | grep --quiet $1; then
+      log "$1 is already installed. skipping."
+    else
+      # shellcheck disable=SC2068
+      apt-get install $@ -y
+    fi
+  elif [[ $OS = "centos" ]]; then
+    if rpm -qa | grep --quiet $1; then
+      log "$1 is already installed. skipping."
+    else
+      # shellcheck disable=SC2068
+      yum install $@ -y
+    fi
+  fi
+}
+
+function detect_fpm_output() {
+  if [[ $OS = "ubuntu" ]]; then
+    export FPM_OUTPUT=deb
+  elif [[ $OS = "centos" ]]; then
+    export FPM_OUTPUT=rpm
+  fi
+}
+detect_fpm_output
+
+function gem_install() {
+  if gem list | grep --quiet $1; then
+    log "$1 is already installed. skipping."
+  else
+    # shellcheck disable=SC2068
+    gem install $@
+  fi
+}
+
+function main() {
+  if [[ $# -ne 1 ]]; then
+    fatal "Usage: $0 <rocksdb_version>"
+  else
+    log "using rocksdb version: $1"
+  fi
+
+  if [[ -d /vagrant ]]; then
+    if [[ $OS = "ubuntu" ]]; then
+      package g++-4.8
+      export CXX=g++-4.8
+
+      # the deb would depend on libgflags2, but the static lib is the only thing
+      # installed by make install
+      package libgflags-dev
+
+      package ruby-all-dev
+    elif [[ $OS = "centos" ]]; then
+      pushd /etc/yum.repos.d
+      if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then
+        wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo
+      fi
+      package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6
+      package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6
+      export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc
+      export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp
+      export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++
+      export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin
+      popd
+      if ! rpm -qa | grep --quiet gflags; then
+        rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm
+      fi
+
+      package ruby
+      package ruby-devel
+      package rubygems
+      package rpm-build
+    fi
+  fi
+  gem_install fpm
+
+  make static_lib
+  LIBDIR=/usr/lib
+  if [[ $FPM_OUTPUT = "rpm" ]]; then
+      LIBDIR=$(rpm --eval '%_libdir')
+  fi
+
+  rm -rf package
+  make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR
+
+  fpm \
+    -s dir \
+    -t $FPM_OUTPUT \
+    -C package \
+    -n rocksdb \
+    -v $1 \
+    --url http://rocksdb.org/ \
+    -m rocksdb@fb.com \
+    --license BSD \
+    --vendor Facebook \
+    --description "RocksDB is an embeddable persistent key-value store for fast storage." \
+    usr
+}
+
+# shellcheck disable=SC2068
+main $@
--- a/build_tools/ps_with_stack
+++ b/build_tools/ps_with_stack
@ -0,0 +1,38 @@
+#!/usr/bin/env perl
+
+use strict;
+
+open(my $ps, "-|", "ps -wwf");
+my $cols_known = 0;
+my $cmd_col = 0;
+my $pid_col = 0;
+while (<$ps>) {
+  print;
+  my @cols = split(/\s+/);
+
+  if (!$cols_known && /CMD/) {
+    # Parse relevant ps column headers
+    for (my $i = 0; $i <= $#cols; $i++) {
+      if ($cols[$i] eq "CMD") {
+        $cmd_col = $i;
+      }
+      if ($cols[$i] eq "PID") {
+        $pid_col = $i;
+      }
+    }
+    $cols_known = 1;
+  } else {
+    my $pid = $cols[$pid_col];
+    my $cmd = $cols[$cmd_col];
+    # Match numeric PID and relative path command
+    # -> The intention is only to dump stack traces for hangs in code under
+    # test, which means we probably just built it and are executing by
+    # relative path (e.g. ./my_test or foo/bar_test) rather then by absolute
+    # path (e.g. /usr/bin/time) or PATH search (e.g. grep).
+    if ($pid =~ /^[0-9]+$/ && $cmd =~ /^[^\/ ]+[\/]/) {
+      print "Dumping stacks for $pid...\n";
+      system("pstack $pid || gdb -batch -p $pid -ex 'thread apply all bt'");
+    }
+  }
+}
+close $ps;
--- a/build_tools/regression_build_test.sh
+++ b/build_tools/regression_build_test.sh
@ -0,0 +1,396 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+set -e
+
+NUM=10000000
+
+if [ $# -eq 1 ];then
+  DATA_DIR=$1
+elif [ $# -eq 2 ];then
+  DATA_DIR=$1
+  STAT_FILE=$2
+fi
+
+# On the production build servers, set data and stat
+# files/directories not in /tmp or else the tempdir cleaning
+# scripts will make you very unhappy.
+DATA_DIR=${DATA_DIR:-$(mktemp -t -d rocksdb_XXXX)}
+STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)}
+
+function cleanup {
+  rm -rf $DATA_DIR
+  rm -f $STAT_FILE.*
+}
+
+trap cleanup EXIT
+
+make release
+
+# measure fillseq + fill up the DB for overwrite benchmark
+./db_bench \
+    --benchmarks=fillseq \
+    --db=$DATA_DIR \
+    --use_existing_db=0 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --writes=$NUM \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0  > ${STAT_FILE}.fillseq
+
+# measure overwrite performance
+./db_bench \
+    --benchmarks=overwrite \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --writes=$((NUM / 10)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6  \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=8 > ${STAT_FILE}.overwrite
+
+# fill up the db for readrandom benchmark (1GB total size)
+./db_bench \
+    --benchmarks=fillseq \
+    --db=$DATA_DIR \
+    --use_existing_db=0 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --writes=$NUM \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=1 > /dev/null
+
+# measure readrandom with 6GB block cache
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --reads=$((NUM / 5)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandom
+
+# measure readrandom with 6GB block cache and tailing iterator
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --reads=$((NUM / 5)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --use_tailing_iterator=1 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandomtailing
+
+# measure readrandom with 100MB block cache
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --reads=$((NUM / 5)) \
+    --cache_size=104857600 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandomsmallblockcache
+
+# measure readrandom with 8k data in memtable
+./db_bench \
+    --benchmarks=overwrite,readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --reads=$((NUM / 5)) \
+    --writes=512 \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --write_buffer_size=1000000000 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandom_mem_sst
+
+
+# fill up the db for readrandom benchmark with filluniquerandom (1GB total size)
+./db_bench \
+    --benchmarks=filluniquerandom \
+    --db=$DATA_DIR \
+    --use_existing_db=0 \
+    --bloom_bits=10 \
+    --num=$((NUM / 4)) \
+    --writes=$((NUM / 4)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=1 > /dev/null
+
+# dummy test just to compact the data
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$((NUM / 1000)) \
+    --reads=$((NUM / 1000)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > /dev/null
+
+# measure readrandom after load with filluniquerandom with 6GB block cache
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$((NUM / 4)) \
+    --reads=$((NUM / 4)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --disable_auto_compactions=1 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandom_filluniquerandom
+
+# measure readwhilewriting after load with filluniquerandom with 6GB block cache
+./db_bench \
+    --benchmarks=readwhilewriting \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$((NUM / 4)) \
+    --reads=$((NUM / 4)) \
+    --benchmark_write_rate_limit=$(( 110 * 1024 )) \
+    --write_buffer_size=100000000 \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readwhilewriting
+
+# measure memtable performance -- none of the data gets flushed to disk
+./db_bench \
+    --benchmarks=fillrandom,readrandom, \
+    --db=$DATA_DIR \
+    --use_existing_db=0 \
+    --num=$((NUM / 10)) \
+    --reads=$NUM \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --write_buffer_size=1000000000 \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --value_size=10 \
+    --threads=16 > ${STAT_FILE}.memtablefillreadrandom
+
+common_in_mem_args="--db=/dev/shm/rocksdb \
+    --num_levels=6 \
+    --key_size=20 \
+    --prefix_size=12 \
+    --keys_per_prefix=10 \
+    --value_size=100 \
+    --compression_type=none \
+    --compression_ratio=1 \
+    --write_buffer_size=134217728 \
+    --max_write_buffer_number=4 \
+    --level0_file_num_compaction_trigger=8 \
+    --level0_slowdown_writes_trigger=16 \
+    --level0_stop_writes_trigger=24 \
+    --target_file_size_base=134217728 \
+    --max_bytes_for_level_base=1073741824 \
+    --disable_wal=0 \
+    --wal_dir=/dev/shm/rocksdb \
+    --sync=0 \
+    --verify_checksum=1 \
+    --delete_obsolete_files_period_micros=314572800 \
+    --use_plain_table=1 \
+    --open_files=-1 \
+    --mmap_read=1 \
+    --mmap_write=0 \
+    --bloom_bits=10 \
+    --bloom_locality=1 \
+    --perf_level=0"
+
+# prepare a in-memory DB with 50M keys, total DB size is ~6G
+./db_bench \
+    $common_in_mem_args \
+    --statistics=0 \
+    --max_background_compactions=16 \
+    --max_background_flushes=16 \
+    --benchmarks=filluniquerandom \
+    --use_existing_db=0 \
+    --num=52428800 \
+    --threads=1 > /dev/null
+
+# Readwhilewriting
+./db_bench \
+    $common_in_mem_args \
+    --statistics=1 \
+    --max_background_compactions=4 \
+    --max_background_flushes=0 \
+    --benchmarks=readwhilewriting\
+    --use_existing_db=1 \
+    --duration=600 \
+    --threads=32 \
+    --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram
+
+# Seekrandomwhilewriting
+./db_bench \
+    $common_in_mem_args \
+    --statistics=1 \
+    --max_background_compactions=4 \
+    --max_background_flushes=0 \
+    --benchmarks=seekrandomwhilewriting \
+    --use_existing_db=1 \
+    --use_tailing_iterator=1 \
+    --duration=600 \
+    --threads=32 \
+    --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram
+
+# measure fillseq with bunch of column families
+./db_bench \
+    --benchmarks=fillseq \
+    --num_column_families=500 \
+    --write_buffer_size=1048576 \
+    --db=$DATA_DIR \
+    --use_existing_db=0 \
+    --num=$NUM \
+    --writes=$NUM \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0  > ${STAT_FILE}.fillseq_lots_column_families
+
+# measure overwrite performance with bunch of column families
+./db_bench \
+    --benchmarks=overwrite \
+    --num_column_families=500 \
+    --write_buffer_size=1048576 \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --num=$NUM \
+    --writes=$((NUM / 10)) \
+    --open_files=55000 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=8 > ${STAT_FILE}.overwrite_lots_column_families
+
+# send data to ods
+function send_to_ods {
+  key="$1"
+  value="$2"
+
+  if [ -z $JENKINS_HOME ]; then
+    # running on devbox, just print out the values
+    echo $1 $2
+    return
+  fi
+
+  if [ -z "$value" ];then
+    echo >&2 "ERROR: Key $key doesn't have a value."
+    return
+  fi
+  curl --silent "https://www.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
+    --connect-timeout 60
+}
+
+function send_benchmark_to_ods {
+  bench="$1"
+  bench_key="$2"
+  file="$3"
+
+  QPS=$(grep $bench $file | awk '{print $5}')
+  P50_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $3}' )
+  P75_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $5}' )
+  P99_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $7}' )
+
+  send_to_ods rocksdb.build.$bench_key.qps $QPS
+  send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS
+  send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS
+  send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS
+}
+
+send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
+send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
+send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
+send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing
+send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache
+send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst
+send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom
+send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom
+send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom
+send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting
+send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram
+send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram
+send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families
+send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families
--- a/build_tools/run_ci_db_test.ps1
+++ b/build_tools/run_ci_db_test.ps1
@ -0,0 +1,493 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# This script enables you running RocksDB tests by running
+# All the tests concurrently and utilizing all the cores
+Param(
+  [switch]$EnableJE = $false,  # Look for and use test executable, append _je to listed exclusions
+  [switch]$RunAll = $false,    # Will attempt discover all *_test[_je].exe binaries and run all
+                               # of them as Google suites. I.e. It will run test cases concurrently
+                               # except those mentioned as $Run, those will run as individual test cases
+                               # And any execlued with $ExcludeExes or $ExcludeCases
+                               # It will also not run any individual test cases
+                               # excluded but $ExcludeCasese
+  [switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically
+                               # It will attempt to run them in parallel w/o breaking them up on individual
+                               # test cases. Those listed with $ExcludeExes will be excluded
+  [string]$SuiteRun = "",      # Split test suites in test cases and run in parallel, not compatible with $RunAll
+  [string]$Run = "",           # Run specified executables in parallel but do not split to test cases
+  [string]$ExcludeCases = "",  # Exclude test cases, expects a comma separated list, no spaces
+                               # Takes effect when $RunAll or $SuiteRun is specified. Must have full
+                               # Test cases name including a group and a parameter if any
+  [string]$ExcludeExes = "",   # Exclude exes from consideration, expects a comma separated list,
+                               # no spaces. Takes effect only when $RunAll is specified
+  [string]$WorkFolder = "",    # Direct tests to use that folder. SSD or Ram drive are better options.
+   # Number of async tasks that would run concurrently. Recommend a number below 64.
+   # However, CPU utlization really depends on the storage media. Recommend ram based disk.
+   # a value of 1 will run everything serially
+  [int]$Concurrency = 8,
+  [int]$Limit = -1 # -1 means do not limit for test purposes
+)
+
+# Folders and commands must be fullpath to run assuming
+# the current folder is at the root of the git enlistment
+$StartDate = (Get-Date)
+$StartDate
+
+
+$DebugPreference = "Continue"
+
+# These tests are not google test suites and we should guard
+# Against running them as suites
+$RunOnly = New-Object System.Collections.Generic.HashSet[string]
+$RunOnly.Add("c_test") | Out-Null
+$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null
+$RunOnly.Add("merge_test") | Out-Null
+$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written
+$RunOnly.Add("backup_engine_test") | Out-Null # Disabled
+$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest
+
+if($RunAll -and $SuiteRun -ne "") {
+    Write-Error "$RunAll and $SuiteRun are not compatible"
+    exit 1
+}
+
+if($RunAllExe -and $Run -ne "") {
+    Write-Error "$RunAllExe and $Run are not compatible"
+    exit 1
+}
+
+# If running under Appveyor assume that root
+[string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER
+if($Appveyor -ne "") {
+    $RootFolder = $Appveyor
+} else {
+    $RootFolder = $PSScriptRoot -replace '\\build_tools', ''
+}
+
+$LogFolder = -Join($RootFolder, "\db_logs\")
+$BinariesFolder = -Join($RootFolder, "\build\Debug\")
+
+if($WorkFolder -eq "") {
+
+    # If TEST_TMPDIR is set use it
+    [string]$var = $Env:TEST_TMPDIR
+    if($var -eq "") {
+        $WorkFolder = -Join($RootFolder, "\db_tests\")
+        $Env:TEST_TMPDIR = $WorkFolder
+    } else {
+        $WorkFolder = $var
+    }
+} else {
+# Override from a command line
+  $Env:TEST_TMPDIR = $WorkFolder
+}
+
+Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder"
+Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder"
+
+# Create test directories in the current folder
+md -Path $WorkFolder -ErrorAction Ignore | Out-Null
+md -Path $LogFolder -ErrorAction Ignore | Out-Null
+
+
+$ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string]
+if($ExcludeCases -ne "") {
+    Write-Host "ExcludeCases: $ExcludeCases"
+    $l = $ExcludeCases -split ' '
+    ForEach($t in $l) {
+      $ExcludeCasesSet.Add($t) | Out-Null
+    }
+}
+
+$ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string]
+if($ExcludeExes -ne "") {
+    Write-Host "ExcludeExe: $ExcludeExes"
+    $l = $ExcludeExes -split ' '
+    ForEach($t in $l) {
+      $ExcludeExesSet.Add($t) | Out-Null
+    }
+}
+
+
+# Extract the names of its tests by running db_test with --gtest_list_tests.
+# This filter removes the "#"-introduced comments, and expands to
+# fully-qualified names by changing input like this:
+#
+#   DBTest.
+#     Empty
+#     WriteEmptyBatch
+#   MultiThreaded/MultiThreadedDBTest.
+#     MultiThreaded/0  # GetParam() = 0
+#     MultiThreaded/1  # GetParam() = 1
+#   RibbonTypeParamTest/0.  # TypeParam = struct DefaultTypesAndSettings
+#     CompactnessAndBacktrackAndFpRate
+#     Extremes
+#     FindOccupancyForSuccessRate
+#
+# into this:
+#
+#   DBTest.Empty
+#   DBTest.WriteEmptyBatch
+#   MultiThreaded/MultiThreadedDBTest.MultiThreaded/0
+#   MultiThreaded/MultiThreadedDBTest.MultiThreaded/1
+#   RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate
+#   RibbonTypeParamTest/0.Extremes
+#   RibbonTypeParamTest/0.FindOccupancyForSuccessRate
+#
+# Output into the parameter in a form TestName -> Log File Name
+function ExtractTestCases([string]$GTestExe, $HashTable) {
+
+    $Tests = @()
+# Run db_test to get a list of tests and store it into $a array
+    &$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null
+
+    # Current group
+    $Group=""
+
+    ForEach( $l in $Tests) {
+
+      # remove trailing comment if any
+      $l = $l -replace '\s+\#.*',''
+      # Leading whitespace is fine
+      $l = $l -replace '^\s+',''
+      # Trailing dot is a test group but no whitespace
+      if ($l -match "\.$" -and $l -notmatch "\s+") {
+        $Group = $l
+      }  else {
+        # Otherwise it is a test name, remove leading space
+        $test = $l
+        # create a log name
+        $test = "$Group$test"
+
+        if($ExcludeCasesSet.Contains($test)) {
+            Write-Warning "$test case is excluded"
+            continue
+        }
+
+        $test_log = $test -replace '[\./]','_'
+        $test_log += ".log"
+        $log_path = -join ($LogFolder, $test_log)
+
+        # Add to a hashtable
+        $HashTable.Add($test, $log_path);
+      }
+    }
+}
+
+# The function removes trailing .exe siffix if any,
+# creates a name for the log file
+# Then adds the test name if it was not excluded into
+# a HashTable in a form of test_name -> log_path
+function MakeAndAdd([string]$token, $HashTable) {
+
+    $test_name = $token -replace '.exe$', ''
+    $log_name =  -join ($test_name, ".log")
+    $log_path = -join ($LogFolder, $log_name)
+    $HashTable.Add($test_name, $log_path)
+}
+
+# This function takes a list of Suites to run
+# Lists all the test cases in each of the suite
+# and populates HashOfHashes
+# Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }}
+function ProcessSuites($ListOfSuites, $HashOfHashes) {
+
+  $suite_list = $ListOfSuites
+  # Problem: if you run --gtest_list_tests on
+  # a non Google Test executable then it will start executing
+  # and we will get nowhere
+  ForEach($suite in $suite_list) {
+
+    if($RunOnly.Contains($suite)) {
+      Write-Warning "$suite is excluded from running as Google test suite"
+      continue
+    }
+
+    if($EnableJE) {
+      $suite += "_je"
+    }
+
+    $Cases = [ordered]@{}
+    $Cases.Clear()
+    $suite_exe = -Join ($BinariesFolder, $suite)
+    ExtractTestCases -GTestExe $suite_exe -HashTable $Cases
+    if($Cases.Count -gt 0) {
+      $HashOfHashes.Add($suite, $Cases);
+    }
+  }
+
+  # Make logs and run
+  if($CasesToRun.Count -lt 1) {
+     Write-Error "Failed to extract tests from $SuiteRun"
+     exit 1
+  }
+
+}
+
+# This will contain all test executables to run
+
+# Hash table that contains all non suite
+# Test executable to run
+$TestExes = [ordered]@{}
+
+# Check for test exe that are not
+# Google Test Suites
+# Since this is explicitely mentioned it is not subject
+# for exclusions
+if($Run -ne "") {
+
+  $test_list = $Run -split ' '
+  ForEach($t in $test_list) {
+
+    if($EnableJE) {
+      $t += "_je"
+    }
+    MakeAndAdd -token $t -HashTable $TestExes
+  }
+
+  if($TestExes.Count -lt 1) {
+     Write-Error "Failed to extract tests from $Run"
+     exit 1
+  }
+} elseif($RunAllExe) {
+  # Discover all the test binaries
+  if($EnableJE) {
+    $pattern = "*_test_je.exe"
+  } else {
+    $pattern = "*_test.exe"
+  }
+
+  $search_path = -join ($BinariesFolder, $pattern)
+  Write-Host "Binaries Search Path: $search_path"
+
+  $DiscoveredExe = @()
+  dir -Path $search_path | ForEach-Object {
+     $DiscoveredExe += ($_.Name)
+  }
+
+  # Remove exclusions
+  ForEach($e in $DiscoveredExe) {
+    $e = $e -replace '.exe$', ''
+    $bare_name = $e -replace '_je$', ''
+
+    if($ExcludeExesSet.Contains($bare_name)) {
+      Write-Warning "Test $e is excluded"
+      continue
+    }
+    MakeAndAdd -token $e -HashTable $TestExes
+  }
+
+  if($TestExes.Count -lt 1) {
+     Write-Error "Failed to discover test executables"
+     exit 1
+  }
+}
+
+# Ordered by exe @{ Exe = @{ TestCase = LogName }}
+$CasesToRun = [ordered]@{}
+
+if($SuiteRun -ne "") {
+  $suite_list = $SuiteRun -split ' '
+  ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun
+} elseif ($RunAll) {
+# Discover all the test binaries
+  if($EnableJE) {
+    $pattern = "*_test_je.exe"
+  } else {
+    $pattern = "*_test.exe"
+  }
+
+  $search_path = -join ($BinariesFolder, $pattern)
+  Write-Host "Binaries Search Path: $search_path"
+
+  $ListOfExe = @()
+  dir -Path $search_path | ForEach-Object {
+     $ListOfExe += ($_.Name)
+  }
+
+  # Exclude those in RunOnly from running as suites
+  $ListOfSuites = @()
+  ForEach($e in $ListOfExe) {
+
+    $e = $e -replace '.exe$', ''
+    $bare_name = $e -replace '_je$', ''
+
+    if($ExcludeExesSet.Contains($bare_name)) {
+      Write-Warning "Test $e is excluded"
+      continue
+    }
+
+    if($RunOnly.Contains($bare_name)) {
+      MakeAndAdd -token $e -HashTable $TestExes
+    } else {
+      $ListOfSuites += $bare_name
+    }
+  }
+
+  ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun
+}
+
+
+# Invoke a test with a filter and redirect all output
+$InvokeTestCase = {
+    param($exe, $test, $log);
+    &$exe --gtest_filter=$test > $log 2>&1
+}
+
+# Invoke all tests and redirect output
+$InvokeTestAsync = {
+    param($exe, $log)
+    &$exe > $log 2>&1
+}
+
+# Hash that contains tests to rerun if any failed
+# Those tests will be rerun sequentially
+# $Rerun = [ordered]@{}
+# Test limiting factor here
+[int]$count = 0
+# Overall status
+[bool]$script:success = $true;
+
+function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal)
+{
+    # Array to wait for any of the running jobs
+    $jobs = @()
+    # Hash JobToLog
+    $JobToLog = @{}
+
+    # Wait for all to finish and get the results
+    while(($JobToLog.Count -gt 0) -or
+          ($TestCmds.Count -gt 0) -or
+           ($Suites.Count -gt 0)) {
+
+        # Make sure we have maximum concurrent jobs running if anything
+        # and the $Limit either not set or allows to proceed
+        while(($JobToLog.Count -lt $ConcurrencyVal) -and
+              ((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and
+              (($Limit -lt 0) -or ($count -lt $Limit)))) {
+
+            # We always favore suites to run if available
+            [string]$exe_name = ""
+            [string]$log_path = ""
+            $Cases = @{}
+
+            if($Suites.Count -gt 0) {
+              # Will the first one
+              ForEach($e in $Suites.Keys) {
+                $exe_name = $e
+                $Cases = $Suites[$e]
+                break
+              }
+              [string]$test_case = ""
+              [string]$log_path = ""
+              ForEach($c in $Cases.Keys) {
+                 $test_case = $c
+                 $log_path = $Cases[$c]
+                 break
+              }
+
+              Write-Host "Starting $exe_name::$test_case"
+              [string]$Exe =  -Join ($BinariesFolder, $exe_name)
+              $job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase
+              $JobToLog.Add($job, $log_path)
+
+              $Cases.Remove($test_case)
+              if($Cases.Count -lt 1) {
+                $Suites.Remove($exe_name)
+              }
+
+            } elseif ($TestCmds.Count -gt 0) {
+
+               ForEach($e in $TestCmds.Keys) {
+                 $exe_name = $e
+                 $log_path = $TestCmds[$e]
+                 break
+               }
+
+              Write-Host "Starting $exe_name"
+              [string]$Exe =  -Join ($BinariesFolder, $exe_name)
+              $job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path)
+              $JobToLog.Add($job, $log_path)
+
+              $TestCmds.Remove($exe_name)
+
+            } else {
+                Write-Error "In the job loop but nothing to run"
+                exit 1
+            }
+
+            ++$count
+        } # End of Job starting loop
+
+        if($JobToLog.Count -lt 1) {
+          break
+        }
+
+        $jobs = @()
+        foreach($k in $JobToLog.Keys) { $jobs += $k }
+
+        $completed = Wait-Job -Job $jobs -Any
+        $log = $JobToLog[$completed]
+        $JobToLog.Remove($completed)
+
+        $message = -join @($completed.Name, " State: ", ($completed.State))
+
+        $log_content = @(Get-Content $log)
+
+        if($completed.State -ne "Completed") {
+            $script:success = $false
+            Write-Warning $message
+            $log_content | Write-Warning
+        } else {
+            # Scan the log. If we find PASSED and no occurrence of FAILED
+            # then it is a success
+            [bool]$pass_found = $false
+            ForEach($l in $log_content) {
+
+                if(($l -match "^\[\s+FAILED") -or
+                   ($l -match "Assertion failed:")) {
+                    $pass_found = $false
+                    break
+                }
+
+                if(($l -match "^\[\s+PASSED") -or
+                   ($l -match " : PASSED$") -or
+                    ($l -match "^PASS$") -or   # Special c_test case
+                    ($l -match "Passed all tests!") ) {
+                    $pass_found = $true
+                }
+            }
+
+            if(!$pass_found) {
+                $script:success = $false;
+                Write-Warning $message
+                $log_content | Write-Warning
+            } else {
+                Write-Host $message
+            }
+        }
+
+        # Remove cached job info from the system
+        # Should be no output
+        Receive-Job -Job $completed | Out-Null
+    }
+}
+
+RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency
+
+$EndDate = (Get-Date)
+
+New-TimeSpan -Start $StartDate -End $EndDate |
+  ForEach-Object {
+    "Elapsed time: {0:g}" -f $_
+  }
+
+
+if(!$script:success) {
+# This does not succeed killing off jobs quick
+# So we simply exit
+#    Remove-Job -Job $jobs -Force
+# indicate failure using this exit code
+    exit 1
+ }
+
+ exit 0
--- a/build_tools/setup_centos7.sh
+++ b/build_tools/setup_centos7.sh
@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+set -ex
+
+ROCKSDB_VERSION="6.7.3"
+ZSTD_VERSION="1.4.4"
+
+echo "This script configures CentOS with everything needed to build and run RocksDB"
+
+yum update -y && yum install epel-release -y
+
+yum install -y \
+  wget \
+  gcc-c++ \
+  snappy snappy-devel \
+  zlib zlib-devel \
+  bzip2 bzip2-devel \
+  lz4-devel \
+  libasan \
+  gflags
+
+mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION}
+ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb
+
+wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz
+wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz
+
+cd /tmp
+
+tar xzvf zstd-${ZSTD_VERSION}.tar.gz
+tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/
+
+echo "Installing ZSTD..."
+pushd zstd-${ZSTD_VERSION}
+make && make install
+popd
+
+echo "Compiling RocksDB..."
+cd /usr/local/rocksdb
+chown -R vagrant:vagrant /usr/local/rocksdb/
+sudo -u vagrant make static_lib
+cd examples/
+sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all
+sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example
+
--- a/build_tools/ubuntu20_image/Dockerfile
+++ b/build_tools/ubuntu20_image/Dockerfile
@ -0,0 +1,57 @@
+# from official ubuntu 20.04
+FROM ubuntu:20.04
+# update system
+RUN apt-get update && apt-get upgrade -y
+# install basic tools
+RUN apt-get install -y vim wget curl
+# install tzdata noninteractive
+RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata
+# install git and default compilers
+RUN apt-get install -y git gcc g++ clang clang-tools
+# install basic package
+RUN apt-get install -y lsb-release software-properties-common gnupg
+# install gflags, tbb
+RUN apt-get install -y libgflags-dev libtbb-dev
+# install compression libs
+RUN apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev
+# install cmake
+RUN apt-get install -y cmake
+RUN apt-get install -y libssl-dev
+# install clang-13
+WORKDIR /root
+RUN wget https://apt.llvm.org/llvm.sh
+RUN chmod +x llvm.sh
+RUN ./llvm.sh 13 all
+# install gcc-7, 8, 10, 11, default is 9
+RUN apt-get install -y gcc-7 g++-7
+RUN apt-get install -y gcc-8 g++-8
+RUN apt-get install -y gcc-10 g++-10
+RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
+RUN apt-get install -y gcc-11 g++-11
+# install apt-get install -y valgrind
+RUN apt-get install -y valgrind
+# install folly depencencies
+RUN apt-get install -y libgoogle-glog-dev
+# install openjdk 8
+RUN apt-get install -y openjdk-8-jdk
+ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
+# install mingw
+RUN apt-get install -y mingw-w64
+
+# install gtest-parallel package
+RUN git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel
+ENV PATH $PATH:/root/gtest-parallel
+
+# install libprotobuf for fuzzers test
+RUN apt-get install -y ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool
+RUN git clone --branch v1.0 https://github.com/google/libprotobuf-mutator.git ~/libprotobuf-mutator && cd ~/libprotobuf-mutator && git checkout ffd86a32874e5c08a143019aad1aaf0907294c9f && mkdir build && cd build && cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON && ninja && ninja install
+ENV PKG_CONFIG_PATH /usr/local/OFF/:/root/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/
+ENV PROTOC_BIN /root/libprotobuf-mutator/build/external.protobuf/bin/protoc
+
+# install the latest google benchmark
+RUN git clone --depth 1 --branch v1.7.0 https://github.com/google/benchmark.git ~/benchmark
+RUN cd ~/benchmark && mkdir build && cd build && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0 && ninja && ninja install
+
+# clean up
+RUN rm -rf /var/lib/apt/lists/*
+RUN rm -rf /root/benchmark
--- a/build_tools/update_dependencies.sh
+++ b/build_tools/update_dependencies.sh
@ -0,0 +1,106 @@
+#!/bin/sh
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#
+# Update dependencies.sh file with the latest avaliable versions
+
+BASEDIR=$(dirname $0)
+OUTPUT=""
+
+function log_header()
+{
+  echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
+  echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
+}
+
+
+function log_variable()
+{
+  echo "$1=${!1}" >> "$OUTPUT"
+}
+
+
+TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
+## $1 => lib name
+## $2 => lib version (if not provided, will try to pick latest)
+## $3 => platform (if not provided, will try to pick latest gcc)
+##
+## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location
+function get_lib_base()
+{
+  local lib_name=$1
+  local lib_version=$2
+  local lib_platform=$3
+
+  local result="$TP2_LATEST/$lib_name/"
+  
+  # Lib Version
+  if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then
+    # version is not provided, use latest
+    result=`ls -dr1v $result/*/ | head -n1`
+  else
+    result="$result/$lib_version/"
+  fi
+  
+  # Lib Platform
+  if [ -z "$lib_platform" ]; then
+    # platform is not provided, use latest gcc
+    result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1`
+  else
+    echo $lib_platform
+    result="$result/$lib_platform/"
+  fi
+  
+  result=`ls -1d $result/*/ | head -n1`
+
+  echo Finding link $result
+  
+  # lib_name => LIB_NAME_BASE
+  local __res_var=${lib_name^^}"_BASE"
+  __res_var=`echo $__res_var | tr - _`
+  # LIB_NAME_BASE=$result
+  eval $__res_var=`readlink -f $result`
+  
+  log_variable $__res_var
+}
+
+###########################################################
+#                platform010 dependencies                 #
+###########################################################
+
+OUTPUT="$BASEDIR/dependencies_platform010.sh"
+
+rm -f "$OUTPUT"
+touch "$OUTPUT"
+
+echo "Writing dependencies to $OUTPUT"
+
+# Compilers locations
+GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
+CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
+
+log_header
+log_variable GCC_BASE
+log_variable CLANG_BASE
+
+# Libraries locations
+get_lib_base libgcc     11.x    platform010
+get_lib_base glibc      2.34    platform010
+get_lib_base snappy     LATEST  platform010
+get_lib_base zlib       LATEST  platform010
+get_lib_base bzip2      LATEST  platform010
+get_lib_base lz4        LATEST  platform010
+get_lib_base zstd       LATEST  platform010
+get_lib_base gflags     LATEST  platform010
+get_lib_base jemalloc   LATEST  platform010
+get_lib_base numa       LATEST  platform010
+get_lib_base libunwind  LATEST  platform010
+get_lib_base tbb        2018_U5 platform010
+get_lib_base liburing   LATEST  platform010
+get_lib_base benchmark  LATEST  platform010
+
+get_lib_base kernel-headers fb platform010
+get_lib_base binutils   LATEST centos7-native
+get_lib_base valgrind   LATEST platform010
+get_lib_base lua        5.3.4  platform010
+
+git diff $OUTPUT
--- a/build_tools/version.sh
+++ b/build_tools/version.sh
@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+if [ "$#" = "0" ]; then
+  echo "Usage: $0 major|minor|patch|full"
+  exit 1
+fi
+
+if [ "$1" = "major" ]; then
+  cat include/rocksdb/version.h  | grep MAJOR | head -n1 | awk '{print $3}'
+fi
+if [ "$1" = "minor" ]; then
+  cat include/rocksdb/version.h  | grep MINOR | head -n1 | awk '{print $3}'
+fi
+if [ "$1" = "patch" ]; then
+  cat include/rocksdb/version.h  | grep PATCH | head -n1 | awk '{print $3}'
+fi
+if [ "$1" = "full" ]; then
+  awk '/#define ROCKSDB/ { env[$2] = $3 }
+       END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"],
+                                  env["ROCKSDB_MINOR"],
+                                  env["ROCKSDB_PATCH"] }'  \
+      include/rocksdb/version.h
+fi
--- a/cache/cache.cc
+++ b/cache/cache.cc
@ -0,0 +1,158 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/cache.h"
+
+#include "cache/lru_cache.h"
+#include "rocksdb/secondary_cache.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+const Cache::CacheItemHelper kNoopCacheItemHelper{};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    lru_cache_options_type_info = {
+        {"capacity",
+         {offsetof(struct LRUCacheOptions, capacity), OptionType::kSizeT,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"num_shard_bits",
+         {offsetof(struct LRUCacheOptions, num_shard_bits), OptionType::kInt,
+          OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
+        {"strict_capacity_limit",
+         {offsetof(struct LRUCacheOptions, strict_capacity_limit),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"high_pri_pool_ratio",
+         {offsetof(struct LRUCacheOptions, high_pri_pool_ratio),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"low_pri_pool_ratio",
+         {offsetof(struct LRUCacheOptions, low_pri_pool_ratio),
+          OptionType::kDouble, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+};
+
+static std::unordered_map<std::string, OptionTypeInfo>
+    comp_sec_cache_options_type_info = {
+        {"capacity",
+         {offsetof(struct CompressedSecondaryCacheOptions, capacity),
+          OptionType::kSizeT, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"num_shard_bits",
+         {offsetof(struct CompressedSecondaryCacheOptions, num_shard_bits),
+          OptionType::kInt, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compression_type",
+         {offsetof(struct CompressedSecondaryCacheOptions, compression_type),
+          OptionType::kCompressionType, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"compress_format_version",
+         {offsetof(struct CompressedSecondaryCacheOptions,
+                   compress_format_version),
+          OptionType::kUInt32T, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+        {"enable_custom_split_merge",
+         {offsetof(struct CompressedSecondaryCacheOptions,
+                   enable_custom_split_merge),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
+};
+
+Status SecondaryCache::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<SecondaryCache>* result) {
+  if (value.find("compressed_secondary_cache://") == 0) {
+    std::string args = value;
+    args.erase(0, std::strlen("compressed_secondary_cache://"));
+    Status status;
+    std::shared_ptr<SecondaryCache> sec_cache;
+
+    CompressedSecondaryCacheOptions sec_cache_opts;
+    status = OptionTypeInfo::ParseStruct(config_options, "",
+                                         &comp_sec_cache_options_type_info, "",
+                                         args, &sec_cache_opts);
+    if (status.ok()) {
+      sec_cache = NewCompressedSecondaryCache(sec_cache_opts);
+    }
+
+
+    if (status.ok()) {
+      result->swap(sec_cache);
+    }
+    return status;
+  } else {
+    return LoadSharedObject<SecondaryCache>(config_options, value, result);
+  }
+}
+
+Status Cache::CreateFromString(const ConfigOptions& config_options,
+                               const std::string& value,
+                               std::shared_ptr<Cache>* result) {
+  Status status;
+  std::shared_ptr<Cache> cache;
+  if (value.find('=') == std::string::npos) {
+    cache = NewLRUCache(ParseSizeT(value));
+  } else {
+    LRUCacheOptions cache_opts;
+    status = OptionTypeInfo::ParseStruct(config_options, "",
+                                         &lru_cache_options_type_info, "",
+                                         value, &cache_opts);
+    if (status.ok()) {
+      cache = NewLRUCache(cache_opts);
+    }
+  }
+  if (status.ok()) {
+    result->swap(cache);
+  }
+  return status;
+}
+
+bool Cache::AsyncLookupHandle::IsReady() {
+  return pending_handle == nullptr || pending_handle->IsReady();
+}
+
+bool Cache::AsyncLookupHandle::IsPending() { return pending_handle != nullptr; }
+
+Cache::Handle* Cache::AsyncLookupHandle::Result() {
+  assert(!IsPending());
+  return result_handle;
+}
+
+void Cache::StartAsyncLookup(AsyncLookupHandle& async_handle) {
+  async_handle.found_dummy_entry = false;  // in case re-used
+  assert(!async_handle.IsPending());
+  async_handle.result_handle =
+      Lookup(async_handle.key, async_handle.helper, async_handle.create_context,
+             async_handle.priority, async_handle.stats);
+}
+
+Cache::Handle* Cache::Wait(AsyncLookupHandle& async_handle) {
+  WaitAll(&async_handle, 1);
+  return async_handle.Result();
+}
+
+void Cache::WaitAll(AsyncLookupHandle* async_handles, size_t count) {
+  for (size_t i = 0; i < count; ++i) {
+    if (async_handles[i].IsPending()) {
+      // If a pending handle gets here, it should be marked at "to be handled
+      // by a caller" by that caller erasing the pending_cache on it.
+      assert(async_handles[i].pending_cache == nullptr);
+    }
+  }
+}
+
+void Cache::SetEvictionCallback(EvictionCallback&& fn) {
+  // Overwriting non-empty with non-empty could indicate a bug
+  assert(!eviction_callback_ || !fn);
+  eviction_callback_ = std::move(fn);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_bench.cc
+++ b/cache/cache_bench.cc
@ -0,0 +1,20 @@
+//  Copyright (c) 2013-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#ifndef GFLAGS
+#include <cstdio>
+int main() {
+  fprintf(stderr, "Please install gflags to run rocksdb tools\n");
+  return 1;
+}
+#else
+#include "rocksdb/cache_bench_tool.h"
+int main(int argc, char** argv) {
+  return ROCKSDB_NAMESPACE::cache_bench_tool(argc, argv);
+}
+#endif  // GFLAGS
--- a/cache/cache_bench_tool.cc
+++ b/cache/cache_bench_tool.cc
--- a/cache/cache_entry_roles.cc
+++ b/cache/cache_entry_roles.cc
@ -0,0 +1,104 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/cache_entry_roles.h"
+
+#include <mutex>
+
+#include "port/lang.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
+    "DataBlock",
+    "FilterBlock",
+    "FilterMetaBlock",
+    "DeprecatedFilterBlock",
+    "IndexBlock",
+    "OtherBlock",
+    "WriteBuffer",
+    "CompressionDictionaryBuildingBuffer",
+    "FilterConstruction",
+    "BlockBasedTableReader",
+    "FileMetadata",
+    "BlobValue",
+    "BlobCache",
+    "Misc",
+}};
+
+std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
+    "data-block",
+    "filter-block",
+    "filter-meta-block",
+    "deprecated-filter-block",
+    "index-block",
+    "other-block",
+    "write-buffer",
+    "compression-dictionary-building-buffer",
+    "filter-construction",
+    "block-based-table-reader",
+    "file-metadata",
+    "blob-value",
+    "blob-cache",
+    "misc",
+}};
+
+const std::string& GetCacheEntryRoleName(CacheEntryRole role) {
+  return kCacheEntryRoleToHyphenString[static_cast<size_t>(role)];
+}
+
+const std::string& BlockCacheEntryStatsMapKeys::CacheId() {
+  static const std::string kCacheId = "id";
+  return kCacheId;
+}
+
+const std::string& BlockCacheEntryStatsMapKeys::CacheCapacityBytes() {
+  static const std::string kCacheCapacityBytes = "capacity";
+  return kCacheCapacityBytes;
+}
+
+const std::string&
+BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds() {
+  static const std::string kLastCollectionDurationSeconds =
+      "secs_for_last_collection";
+  return kLastCollectionDurationSeconds;
+}
+
+const std::string& BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds() {
+  static const std::string kLastCollectionAgeSeconds =
+      "secs_since_last_collection";
+  return kLastCollectionAgeSeconds;
+}
+
+namespace {
+
+std::string GetPrefixedCacheEntryRoleName(const std::string& prefix,
+                                          CacheEntryRole role) {
+  const std::string& role_name = GetCacheEntryRoleName(role);
+  std::string prefixed_role_name;
+  prefixed_role_name.reserve(prefix.size() + role_name.size());
+  prefixed_role_name.append(prefix);
+  prefixed_role_name.append(role_name);
+  return prefixed_role_name;
+}
+
+}  // namespace
+
+std::string BlockCacheEntryStatsMapKeys::EntryCount(CacheEntryRole role) {
+  const static std::string kPrefix = "count.";
+  return GetPrefixedCacheEntryRoleName(kPrefix, role);
+}
+
+std::string BlockCacheEntryStatsMapKeys::UsedBytes(CacheEntryRole role) {
+  const static std::string kPrefix = "bytes.";
+  return GetPrefixedCacheEntryRoleName(kPrefix, role);
+}
+
+std::string BlockCacheEntryStatsMapKeys::UsedPercent(CacheEntryRole role) {
+  const static std::string kPrefix = "percent.";
+  return GetPrefixedCacheEntryRoleName(kPrefix, role);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_entry_roles.h
+++ b/cache/cache_entry_roles.h
@ -0,0 +1,20 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+
+#include "rocksdb/cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern std::array<std::string, kNumCacheEntryRoles>
+    kCacheEntryRoleToCamelString;
+extern std::array<std::string, kNumCacheEntryRoles>
+    kCacheEntryRoleToHyphenString;
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_entry_stats.h
+++ b/cache/cache_entry_stats.h
@ -0,0 +1,182 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+
+#include "cache/cache_key.h"
+#include "cache/typed_cache.h"
+#include "port/lang.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/coding_lean.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A generic helper object for gathering stats about cache entries by
+// iterating over them with ApplyToAllEntries. This class essentially
+// solves the problem of slowing down a Cache with too many stats
+// collectors that could be sharing stat results, such as from multiple
+// column families or multiple DBs sharing a Cache. We employ a few
+// mitigations:
+// * Only one collector for a particular kind of Stats is alive
+// for each Cache. This is guaranteed using the Cache itself to hold
+// the collector.
+// * A mutex ensures only one thread is gathering stats for this
+// collector.
+// * The most recent gathered stats are saved and simply copied to
+// satisfy requests within a time window (default: 3 minutes) of
+// completion of the most recent stat gathering.
+//
+// Template parameter Stats must be copyable and trivially constructable,
+// as well as...
+// concept Stats {
+//   // Notification before applying callback to all entries
+//   void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros);
+//   // Get the callback to apply to all entries. `callback`
+//   // type must be compatible with Cache::ApplyToAllEntries
+//   callback GetEntryCallback();
+//   // Notification after applying callback to all entries
+//   void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros);
+//   // Notification that a collection was skipped because of
+//   // sufficiently recent saved results.
+//   void SkippedCollection();
+// }
+template <class Stats>
+class CacheEntryStatsCollector {
+ public:
+  // Gather and save stats if saved stats are too old. (Use GetStats() to
+  // read saved stats.)
+  //
+  // Maximum allowed age for a "hit" on saved results is determined by the
+  // two interval parameters. Both set to 0 forces a re-scan. For example
+  // with min_interval_seconds=300 and min_interval_factor=100, if the last
+  // scan took 10s, we would only rescan ("miss") if the age in seconds of
+  // the saved results is > max(300, 100*10).
+  // Justification: scans can vary wildly in duration, e.g. from 0.02 sec
+  // to as much as 20 seconds, so we want to be able to cap the absolute
+  // and relative frequency of scans.
+  void CollectStats(int min_interval_seconds, int min_interval_factor) {
+    // Waits for any pending reader or writer (collector)
+    std::lock_guard<std::mutex> lock(working_mutex_);
+
+    uint64_t max_age_micros =
+        static_cast<uint64_t>(std::max(min_interval_seconds, 0)) * 1000000U;
+
+    if (last_end_time_micros_ > last_start_time_micros_ &&
+        min_interval_factor > 0) {
+      max_age_micros = std::max(
+          max_age_micros, min_interval_factor * (last_end_time_micros_ -
+                                                 last_start_time_micros_));
+    }
+
+    uint64_t start_time_micros = clock_->NowMicros();
+    if ((start_time_micros - last_end_time_micros_) > max_age_micros) {
+      last_start_time_micros_ = start_time_micros;
+      working_stats_.BeginCollection(cache_, clock_, start_time_micros);
+
+      cache_->ApplyToAllEntries(working_stats_.GetEntryCallback(), {});
+      TEST_SYNC_POINT_CALLBACK(
+          "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", nullptr);
+
+      uint64_t end_time_micros = clock_->NowMicros();
+      last_end_time_micros_ = end_time_micros;
+      working_stats_.EndCollection(cache_, clock_, end_time_micros);
+    } else {
+      working_stats_.SkippedCollection();
+    }
+
+    // Save so that we don't need to wait for an outstanding collection in
+    // order to make of copy of the last saved stats
+    std::lock_guard<std::mutex> lock2(saved_mutex_);
+    saved_stats_ = working_stats_;
+  }
+
+  // Gets saved stats, regardless of age
+  void GetStats(Stats *stats) {
+    std::lock_guard<std::mutex> lock(saved_mutex_);
+    *stats = saved_stats_;
+  }
+
+  Cache *GetCache() const { return cache_; }
+
+  // Gets or creates a shared instance of CacheEntryStatsCollector in the
+  // cache itself, and saves into `ptr`. This shared_ptr will hold the
+  // entry in cache until all refs are destroyed.
+  static Status GetShared(Cache *raw_cache, SystemClock *clock,
+                          std::shared_ptr<CacheEntryStatsCollector> *ptr) {
+    assert(raw_cache);
+    BasicTypedCacheInterface<CacheEntryStatsCollector, CacheEntryRole::kMisc>
+        cache{raw_cache};
+
+    const Slice &cache_key = GetCacheKey();
+    auto h = cache.Lookup(cache_key);
+    if (h == nullptr) {
+      // Not yet in cache, but Cache doesn't provide a built-in way to
+      // avoid racing insert. So we double-check under a shared mutex,
+      // inspired by TableCache.
+      STATIC_AVOID_DESTRUCTION(std::mutex, static_mutex);
+      std::lock_guard<std::mutex> lock(static_mutex);
+
+      h = cache.Lookup(cache_key);
+      if (h == nullptr) {
+        auto new_ptr = new CacheEntryStatsCollector(cache.get(), clock);
+        // TODO: non-zero charge causes some tests that count block cache
+        // usage to go flaky. Fix the problem somehow so we can use an
+        // accurate charge.
+        size_t charge = 0;
+        Status s =
+            cache.Insert(cache_key, new_ptr, charge, &h, Cache::Priority::HIGH);
+        if (!s.ok()) {
+          assert(h == nullptr);
+          delete new_ptr;
+          return s;
+        }
+      }
+    }
+    // If we reach here, shared entry is in cache with handle `h`.
+    assert(cache.get()->GetCacheItemHelper(h) == cache.GetBasicHelper());
+
+    // Build an aliasing shared_ptr that keeps `ptr` in cache while there
+    // are references.
+    *ptr = cache.SharedGuard(h);
+    return Status::OK();
+  }
+
+ private:
+  explicit CacheEntryStatsCollector(Cache *cache, SystemClock *clock)
+      : saved_stats_(),
+        working_stats_(),
+        last_start_time_micros_(0),
+        last_end_time_micros_(/*pessimistic*/ 10000000),
+        cache_(cache),
+        clock_(clock) {}
+
+  static const Slice &GetCacheKey() {
+    // For each template instantiation
+    static CacheKey ckey = CacheKey::CreateUniqueForProcessLifetime();
+    static Slice ckey_slice = ckey.AsSlice();
+    return ckey_slice;
+  }
+
+  std::mutex saved_mutex_;
+  Stats saved_stats_;
+
+  std::mutex working_mutex_;
+  Stats working_stats_;
+  uint64_t last_start_time_micros_;
+  uint64_t last_end_time_micros_;
+
+  Cache *const cache_;
+  SystemClock *const clock_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_helpers.cc
+++ b/cache/cache_helpers.cc
@ -0,0 +1,40 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/cache_helpers.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void ReleaseCacheHandleCleanup(void* arg1, void* arg2) {
+  Cache* const cache = static_cast<Cache*>(arg1);
+  assert(cache);
+
+  Cache::Handle* const cache_handle = static_cast<Cache::Handle*>(arg2);
+  assert(cache_handle);
+
+  cache->Release(cache_handle);
+}
+
+Status WarmInCache(Cache* cache, const Slice& key, const Slice& saved,
+                   Cache::CreateContext* create_context,
+                   const Cache::CacheItemHelper* helper,
+                   Cache::Priority priority, size_t* out_charge) {
+  assert(helper);
+  assert(helper->create_cb);
+  Cache::ObjectPtr value;
+  size_t charge;
+  Status st = helper->create_cb(saved, create_context,
+                                cache->memory_allocator(), &value, &charge);
+  if (st.ok()) {
+    st =
+        cache->Insert(key, value, helper, charge, /*handle*/ nullptr, priority);
+    if (out_charge) {
+      *out_charge = charge;
+    }
+  }
+  return st;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_helpers.h
+++ b/cache/cache_helpers.h
@ -0,0 +1,139 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Returns the cached value given a cache handle.
+template <typename T>
+T* GetFromCacheHandle(Cache* cache, Cache::Handle* handle) {
+  assert(cache);
+  assert(handle);
+  return static_cast<T*>(cache->Value(handle));
+}
+
+// Turns a T* into a Slice so it can be used as a key with Cache.
+template <typename T>
+Slice GetSliceForKey(const T* t) {
+  return Slice(reinterpret_cast<const char*>(t), sizeof(T));
+}
+
+void ReleaseCacheHandleCleanup(void* arg1, void* arg2);
+
+// Generic resource management object for cache handles that releases the handle
+// when destroyed. Has unique ownership of the handle, so copying it is not
+// allowed, while moving it transfers ownership.
+template <typename T>
+class CacheHandleGuard {
+ public:
+  CacheHandleGuard() = default;
+
+  CacheHandleGuard(Cache* cache, Cache::Handle* handle)
+      : cache_(cache),
+        handle_(handle),
+        value_(GetFromCacheHandle<T>(cache, handle)) {
+    assert(cache_ && handle_ && value_);
+  }
+
+  CacheHandleGuard(const CacheHandleGuard&) = delete;
+  CacheHandleGuard& operator=(const CacheHandleGuard&) = delete;
+
+  CacheHandleGuard(CacheHandleGuard&& rhs) noexcept
+      : cache_(rhs.cache_), handle_(rhs.handle_), value_(rhs.value_) {
+    assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
+
+    rhs.ResetFields();
+  }
+
+  CacheHandleGuard& operator=(CacheHandleGuard&& rhs) noexcept {
+    if (this == &rhs) {
+      return *this;
+    }
+
+    ReleaseHandle();
+
+    cache_ = rhs.cache_;
+    handle_ = rhs.handle_;
+    value_ = rhs.value_;
+
+    assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
+
+    rhs.ResetFields();
+
+    return *this;
+  }
+
+  ~CacheHandleGuard() { ReleaseHandle(); }
+
+  bool IsEmpty() const { return !handle_; }
+
+  Cache* GetCache() const { return cache_; }
+  Cache::Handle* GetCacheHandle() const { return handle_; }
+  T* GetValue() const { return value_; }
+
+  void TransferTo(Cleanable* cleanable) {
+    if (cleanable) {
+      if (handle_ != nullptr) {
+        assert(cache_);
+        cleanable->RegisterCleanup(&ReleaseCacheHandleCleanup, cache_, handle_);
+      }
+    }
+    ResetFields();
+  }
+
+  void Reset() {
+    ReleaseHandle();
+    ResetFields();
+  }
+
+ private:
+  void ReleaseHandle() {
+    if (IsEmpty()) {
+      return;
+    }
+
+    assert(cache_);
+    cache_->Release(handle_);
+  }
+
+  void ResetFields() {
+    cache_ = nullptr;
+    handle_ = nullptr;
+    value_ = nullptr;
+  }
+
+ private:
+  Cache* cache_ = nullptr;
+  Cache::Handle* handle_ = nullptr;
+  T* value_ = nullptr;
+};
+
+// Build an aliasing shared_ptr that keeps `handle` in cache while there
+// are references, but the pointer is to the value for that cache entry,
+// which must be of type T. This is copyable, unlike CacheHandleGuard, but
+// does not provide access to caching details.
+template <typename T>
+std::shared_ptr<T> MakeSharedCacheHandleGuard(Cache* cache,
+                                              Cache::Handle* handle) {
+  auto wrapper = std::make_shared<CacheHandleGuard<T>>(cache, handle);
+  return std::shared_ptr<T>(wrapper, GetFromCacheHandle<T>(cache, handle));
+}
+
+// Given the persistable data (saved) for a block cache entry, parse that
+// into a cache entry object and insert it into the given cache. The charge
+// of the new entry can be returned to the caller through `out_charge`.
+Status WarmInCache(Cache* cache, const Slice& key, const Slice& saved,
+                   Cache::CreateContext* create_context,
+                   const Cache::CacheItemHelper* helper,
+                   Cache::Priority priority = Cache::Priority::LOW,
+                   size_t* out_charge = nullptr);
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_key.cc
+++ b/cache/cache_key.cc
@ -0,0 +1,364 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/cache_key.h"
+
+#include <algorithm>
+#include <atomic>
+
+#include "rocksdb/advanced_cache.h"
+#include "table/unique_id_impl.h"
+#include "util/hash.h"
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Value space plan for CacheKey:
+//
+// file_num_etc64_ | offset_etc64_ | Only generated by
+// ---------------+---------------+------------------------------------------
+//              0 |             0 | Reserved for "empty" CacheKey()
+//              0 |  > 0, < 1<<63 | CreateUniqueForCacheLifetime
+//              0 |      >= 1<<63 | CreateUniqueForProcessLifetime
+//            > 0 |           any | OffsetableCacheKey.WithOffset
+
+CacheKey CacheKey::CreateUniqueForCacheLifetime(Cache *cache) {
+  // +1 so that we can reserve all zeros for "unset" cache key
+  uint64_t id = cache->NewId() + 1;
+  // Ensure we don't collide with CreateUniqueForProcessLifetime
+  assert((id >> 63) == 0U);
+  return CacheKey(0, id);
+}
+
+CacheKey CacheKey::CreateUniqueForProcessLifetime() {
+  // To avoid colliding with CreateUniqueForCacheLifetime, assuming
+  // Cache::NewId counts up from zero, here we count down from UINT64_MAX.
+  // If this ever becomes a point of contention, we could sub-divide the
+  // space and use CoreLocalArray.
+  static std::atomic<uint64_t> counter{UINT64_MAX};
+  uint64_t id = counter.fetch_sub(1, std::memory_order_relaxed);
+  // Ensure we don't collide with CreateUniqueForCacheLifetime
+  assert((id >> 63) == 1U);
+  return CacheKey(0, id);
+}
+
+// How we generate CacheKeys and base OffsetableCacheKey, assuming that
+// db_session_ids are generated from a base_session_id and
+// session_id_counter (by SemiStructuredUniqueIdGen+EncodeSessionId
+// in DBImpl::GenerateDbSessionId):
+//
+// Conceptual inputs:
+//   db_id                   (unstructured, from GenerateRawUniqueId or equiv)
+//                           * could be shared between cloned DBs but rare
+//                           * could be constant, if session id suffices
+//   base_session_id         (unstructured, from GenerateRawUniqueId)
+//   session_id_counter      (structured)
+//                           * usually much smaller than 2**24
+//   orig_file_number        (structured)
+//                           * usually smaller than 2**24
+//   offset_in_file          (structured, might skip lots of values)
+//                           * usually smaller than 2**32
+//
+// Overall approach (see https://github.com/pdillinger/unique_id for
+// background):
+//
+// First, we have three "structured" values, up to 64 bits each, that we
+// need to fit, without losses, into 128 bits. In practice, the values will
+// be small enough that they should fit. For example, applications generating
+// large SST files (large offsets) will naturally produce fewer files (small
+// file numbers). But we don't know ahead of time what bounds the values will
+// have.
+//
+// Second, we have unstructured inputs that enable distinct RocksDB processes
+// to pick a random point in space, likely very different from others. Xoring
+// the structured with the unstructured give us a cache key that is
+// structurally distinct between related keys (e.g. same file or same RocksDB
+// process) and distinct with high probability between unrelated keys.
+//
+// The problem of packing three structured values into the space for two is
+// complicated by the fact that we want to derive cache keys from SST unique
+// IDs, which have already combined structured and unstructured inputs in a
+// practically inseparable way. And we want a base cache key that works
+// with an offset of any size. So basically, we need to encode these three
+// structured values, each up to 64 bits, into 128 bits without knowing any
+// of their sizes. The DownwardInvolution() function gives us a mechanism to
+// accomplish this. (See its properties in math.h.) Specifically, for inputs
+// a, b, and c:
+//   lower64 = DownwardInvolution(a) ^ ReverseBits(b);
+//   upper64 = c ^ ReverseBits(a);
+// The 128-bit output is unique assuming there exist some i, j, and k
+// where a < 2**i, b < 2**j, c < 2**k, i <= 64, j <= 64, k <= 64, and
+// i + j + k <= 128. In other words, as long as there exist some bounds
+// that would allow us to pack the bits of a, b, and c into the output
+// if we know the bound, we can generate unique outputs without knowing
+// those bounds. To validate this claim, the inversion function (given
+// the bounds) has been implemented in CacheKeyDecoder in
+// db_block_cache_test.cc.
+//
+// With that in mind, the outputs in terms of the conceptual inputs look
+// like this, using bitwise-xor of the constituent pieces, low bits on left:
+//
+// |------------------------- file_num_etc64 -------------------------|
+// | +++++++++ base_session_id (lower 64 bits, involution) +++++++++ |
+// |-----------------------------------------------------------------|
+// | session_id_counter (involution) ..... |                         |
+// |-----------------------------------------------------------------|
+// | hash of: ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
+// |  * base_session_id (upper ~39 bits)                             |
+// |  * db_id (~122 bits entropy)                                    |
+// |-----------------------------------------------------------------|
+// |                             | ..... orig_file_number (reversed) |
+// |-----------------------------------------------------------------|
+//
+//
+// |------------------------- offset_etc64 --------------------------|
+// | ++++++++++ base_session_id (lower 64 bits, reversed) ++++++++++ |
+// |-----------------------------------------------------------------|
+// |                           | ..... session_id_counter (reversed) |
+// |-----------------------------------------------------------------|
+// | offset_in_file ............... |                                |
+// |-----------------------------------------------------------------|
+//
+// Some oddities or inconveniences of this layout are due to deriving
+// the "base" cache key (without offset) from the SST unique ID (see
+// GetSstInternalUniqueId). Specifically,
+// * Lower 64 of base_session_id occurs in both output words (ok but
+//   weird)
+// * The inclusion of db_id is bad for the conditions under which we
+//   can guarantee uniqueness, but could be useful in some cases with
+//   few small files per process, to make up for db session id only having
+//   ~103 bits of entropy.
+//
+// In fact, if DB ids were not involved, we would be guaranteed unique
+// cache keys for files generated in a single process until total bits for
+// biggest session_id_counter, orig_file_number, and offset_in_file
+// reach 128 bits.
+//
+// With the DB id limitation, we only have nice guaranteed unique cache
+// keys for files generated in a single process until biggest
+// session_id_counter and offset_in_file reach combined 64 bits. This
+// is quite good in practice because we can have millions of DB Opens
+// with terabyte size SST files, or billions of DB Opens with gigabyte
+// size SST files.
+//
+// One of the considerations in the translation between existing SST unique
+// IDs and base cache keys is supporting better SST unique IDs in a future
+// format_version. If we use a process-wide file counter instead of
+// session counter and file numbers, we only need to combine two 64-bit values
+// instead of three. But we don't want to track unique ID versions in the
+// manifest, so we want to keep the same translation layer between SST unique
+// IDs and base cache keys, even with updated SST unique IDs. If the new
+// unique IDs put the file counter where the orig_file_number was, and
+// use no structured field where session_id_counter was, then our translation
+// layer works fine for two structured fields as well as three (for
+// compatibility). The small computation for the translation (one
+// DownwardInvolution(), two ReverseBits(), both ~log(64) instructions deep)
+// is negligible for computing as part of SST file reader open.
+//
+// More on how https://github.com/pdillinger/unique_id applies here:
+// Every bit of output always includes "unstructured" uniqueness bits and
+// often combines with "structured" uniqueness bits. The "unstructured" bits
+// change infrequently: only when we cannot guarantee our state tracking for
+// "structured" uniqueness hasn't been cloned. Using a static
+// SemiStructuredUniqueIdGen for db_session_ids, this means we only get an
+// "all new" session id when a new process uses RocksDB. (Between processes,
+// we don't know if a DB or other persistent storage has been cloned. We
+// assume that if VM hot cloning is used, subsequently generated SST files
+// do not interact.) Within a process, only the session_lower of the
+// db_session_id changes incrementally ("structured" uniqueness).
+//
+// This basically means that our offsets, counters and file numbers allow us
+// to do somewhat "better than random" (birthday paradox) while in the
+// degenerate case of completely new session for each tiny file, we still
+// have strong uniqueness properties from the birthday paradox, with ~103
+// bit session IDs or up to 128 bits entropy with different DB IDs sharing a
+// cache.
+//
+// More collision probability analysis:
+// Suppose a RocksDB host generates (generously) 2 GB/s (10TB data, 17 DWPD)
+// with average process/session lifetime of (pessimistically) 4 minutes.
+// In 180 days (generous allowable data lifespan), we generate 31 million GB
+// of data, or 2^55 bytes, and 2^16 "all new" session IDs.
+//
+// First, suppose this is in a single DB (lifetime 180 days):
+// 128 bits cache key size
+// - 55 <- ideal size for byte offsets + file numbers
+// -  2 <- bits for offsets and file numbers not exactly powers of two
+// +  2 <- bits saved not using byte offsets in BlockBasedTable::GetCacheKey
+// ----
+//   73 <- bits remaining for distinguishing session IDs
+// The probability of a collision in 73 bits of session ID data is less than
+// 1 in 2**(73 - (2 * 16)), or roughly 1 in a trillion. And this assumes all
+// data from the last 180 days is in cache for potential collision, and that
+// cache keys under each session id exhaustively cover the remaining 57 bits
+// while in reality they'll only cover a small fraction of it.
+//
+// Although data could be transferred between hosts, each host has its own
+// cache and we are already assuming a high rate of "all new" session ids.
+// So this doesn't really change the collision calculation. Across a fleet
+// of 1 million, each with <1 in a trillion collision possibility,
+// fleetwide collision probability is <1 in a million.
+//
+// Now suppose we have many DBs per host, say 2**10, with same host-wide write
+// rate and process/session lifetime. File numbers will be ~10 bits smaller
+// and we will have 2**10 times as many session IDs because of simultaneous
+// lifetimes. So now collision chance is less than 1 in 2**(83 - (2 * 26)),
+// or roughly 1 in a billion.
+//
+// Suppose instead we generated random or hashed cache keys for each
+// (compressed) block. For 1KB compressed block size, that is 2^45 cache keys
+// in 180 days. Collision probability is more easily estimated at roughly
+// 1 in 2**(128 - (2 * 45)) or roughly 1 in a trillion (assuming all
+// data from the last 180 days is in cache, but NOT the other assumption
+// for the 1 in a trillion estimate above).
+//
+//
+// Collision probability estimation through simulation:
+// A tool ./cache_bench -stress_cache_key broadly simulates host-wide cache
+// activity over many months, by making some pessimistic simplifying
+// assumptions. See class StressCacheKey in cache_bench_tool.cc for details.
+// Here is some sample output with
+// `./cache_bench -stress_cache_key -sck_keep_bits=43`:
+//
+//   Total cache or DBs size: 32TiB  Writing 925.926 MiB/s or 76.2939TiB/day
+//   Multiply by 1.15292e+18 to correct for simulation losses (but still
+//   assume whole file cached)
+//
+// These come from default settings of 2.5M files per day of 32 MB each, and
+// `-sck_keep_bits=43` means that to represent a single file, we are only
+// keeping 43 bits of the 128-bit (base) cache key.  With file size of 2**25
+// contiguous keys (pessimistic), our simulation is about 2\*\*(128-43-25) or
+// about 1 billion billion times more prone to collision than reality.
+//
+// More default assumptions, relatively pessimistic:
+// * 100 DBs in same process (doesn't matter much)
+// * Re-open DB in same process (new session ID related to old session ID) on
+// average every 100 files generated
+// * Restart process (all new session IDs unrelated to old) 24 times per day
+//
+// After enough data, we get a result at the end (-sck_keep_bits=43):
+//
+//   (keep 43 bits)  18 collisions after 2 x 90 days, est 10 days between
+//                   (1.15292e+19 corrected)
+//
+// If we believe the (pessimistic) simulation and the mathematical
+// extrapolation, we would need to run a billion machines all for 11 billion
+// days to expect a cache key collision. To help verify that our extrapolation
+// ("corrected") is robust, we can make our simulation more precise by
+// increasing the "keep" bits, which takes more running time to get enough
+// collision data:
+//
+//   (keep 44 bits)  16 collisions after 5 x 90 days, est 28.125 days between
+//                   (1.6213e+19 corrected)
+//   (keep 45 bits)  15 collisions after 7 x 90 days, est 42 days between
+//                   (1.21057e+19 corrected)
+//   (keep 46 bits)  15 collisions after 17 x 90 days, est 102 days between
+//                   (1.46997e+19 corrected)
+//   (keep 47 bits)  15 collisions after 49 x 90 days, est 294 days between
+//                   (2.11849e+19 corrected)
+//
+// The extrapolated prediction seems to be within noise (sampling error).
+//
+// With the `-sck_randomize` option, we can see that typical workloads like
+// above have lower collision probability than "random" cache keys (note:
+// offsets still non-randomized) by a modest amount (roughly 2-3x less
+// collision prone than random), which should make us reasonably comfortable
+// even in "degenerate" cases (e.g. repeatedly launch a process to generate
+// one file with SstFileWriter):
+//
+//   (rand 43 bits) 22 collisions after 1 x 90 days, est 4.09091 days between
+//                  (4.7165e+18 corrected)
+//
+// We can see that with more frequent process restarts,
+// -sck_restarts_per_day=5000, which means more all-new session IDs, we get
+// closer to the "random" cache key performance:
+//
+// 15 collisions after 1 x 90 days, est 6 days between (6.91753e+18 corrected)
+//
+// And with less frequent process restarts and re-opens,
+// -sck_restarts_per_day=1 -sck_reopen_nfiles=1000, we get lower collision
+// probability:
+//
+// 18 collisions after 8 x 90 days, est 40 days between (4.61169e+19 corrected)
+//
+// Other tests have been run to validate other conditions behave as expected,
+// never behaving "worse than random" unless we start chopping off structured
+// data.
+//
+// Conclusion: Even in extreme cases, rapidly burning through "all new" IDs
+// that only arise when a new process is started, the chance of any cache key
+// collisions in a giant fleet of machines is negligible. Especially when
+// processes live for hours or days, the chance of a cache key collision is
+// likely more plausibly due to bad hardware than to bad luck in random
+// session ID data. Software defects are surely more likely to cause corruption
+// than both of those.
+//
+// TODO: Nevertheless / regardless, an efficient way to detect (and thus
+// quantify) block cache corruptions, including collisions, should be added.
+OffsetableCacheKey::OffsetableCacheKey(const std::string &db_id,
+                                       const std::string &db_session_id,
+                                       uint64_t file_number) {
+  UniqueId64x2 internal_id;
+  Status s = GetSstInternalUniqueId(db_id, db_session_id, file_number,
+                                    &internal_id, /*force=*/true);
+  assert(s.ok());
+  *this = FromInternalUniqueId(&internal_id);
+}
+
+OffsetableCacheKey OffsetableCacheKey::FromInternalUniqueId(UniqueIdPtr id) {
+  uint64_t session_lower = id.ptr[0];
+  uint64_t file_num_etc = id.ptr[1];
+
+#ifndef NDEBUG
+  bool is_empty = session_lower == 0 && file_num_etc == 0;
+#endif
+
+  // Although DBImpl guarantees (in recent versions) that session_lower is not
+  // zero, that's not entirely sufficient to guarantee that file_num_etc64_ is
+  // not zero (so that the 0 case can be used by CacheKey::CreateUnique*)
+  // However, if we are given an "empty" id as input, then we should produce
+  // "empty" as output.
+  // As a consequence, this function is only bijective assuming
+  // id[0] == 0 only if id[1] == 0.
+  if (session_lower == 0U) {
+    session_lower = file_num_etc;
+  }
+
+  // See comments above for how DownwardInvolution and ReverseBits
+  // make this function invertible under various assumptions.
+  OffsetableCacheKey rv;
+  rv.file_num_etc64_ =
+      DownwardInvolution(session_lower) ^ ReverseBits(file_num_etc);
+  rv.offset_etc64_ = ReverseBits(session_lower);
+
+  // Because of these transformations and needing to allow arbitrary
+  // offset (thus, second 64 bits of cache key might be 0), we need to
+  // make some correction to ensure the first 64 bits is not 0.
+  // Fortunately, the transformation ensures the second 64 bits is not 0
+  // for non-empty base key, so we can swap in the case one is 0 without
+  // breaking bijectivity (assuming condition above).
+  assert(is_empty || rv.offset_etc64_ > 0);
+  if (rv.file_num_etc64_ == 0) {
+    std::swap(rv.file_num_etc64_, rv.offset_etc64_);
+  }
+  assert(is_empty || rv.file_num_etc64_ > 0);
+  return rv;
+}
+
+// Inverse of FromInternalUniqueId (assuming file_num_etc64 == 0 only if
+// offset_etc64 == 0)
+UniqueId64x2 OffsetableCacheKey::ToInternalUniqueId() {
+  uint64_t a = file_num_etc64_;
+  uint64_t b = offset_etc64_;
+  if (b == 0) {
+    std::swap(a, b);
+  }
+  UniqueId64x2 rv;
+  rv[0] = ReverseBits(b);
+  rv[1] = ReverseBits(a ^ DownwardInvolution(rv[0]));
+  return rv;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_key.h
+++ b/cache/cache_key.h
@ -0,0 +1,143 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/slice.h"
+#include "table/unique_id_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Cache;
+
+// A standard holder for fixed-size block cache keys (and for related caches).
+// They are created through one of these, each using its own range of values:
+// * CacheKey::CreateUniqueForCacheLifetime
+// * CacheKey::CreateUniqueForProcessLifetime
+// * Default ctor ("empty" cache key)
+// * OffsetableCacheKey->WithOffset
+//
+// The first two use atomic counters to guarantee uniqueness over the given
+// lifetime and the last uses a form of universally unique identifier for
+// uniqueness with very high probabilty (and guaranteed for files generated
+// during a single process lifetime).
+//
+// CacheKeys are currently used by calling AsSlice() to pass as a key to
+// Cache. For performance, the keys are endianness-dependent (though otherwise
+// portable). (Persistable cache entries are not intended to cross platforms.)
+class CacheKey {
+ public:
+  // For convenience, constructs an "empty" cache key that is never returned
+  // by other means.
+  inline CacheKey() : file_num_etc64_(), offset_etc64_() {}
+
+  inline bool IsEmpty() const {
+    return (file_num_etc64_ == 0) & (offset_etc64_ == 0);
+  }
+
+  // Use this cache key as a Slice (byte order is endianness-dependent)
+  inline Slice AsSlice() const {
+    static_assert(sizeof(*this) == 16, "Standardized on 16-byte cache key");
+    assert(!IsEmpty());
+    return Slice(reinterpret_cast<const char *>(this), sizeof(*this));
+  }
+
+  // Create a CacheKey that is unique among others associated with this Cache
+  // instance. Depends on Cache::NewId. This is useful for block cache
+  // "reservations".
+  static CacheKey CreateUniqueForCacheLifetime(Cache *cache);
+
+  // Create a CacheKey that is unique among others for the lifetime of this
+  // process. This is useful for saving in a static data member so that
+  // different DB instances can agree on a cache key for shared entities,
+  // such as for CacheEntryStatsCollector.
+  static CacheKey CreateUniqueForProcessLifetime();
+
+ protected:
+  friend class OffsetableCacheKey;
+  CacheKey(uint64_t file_num_etc64, uint64_t offset_etc64)
+      : file_num_etc64_(file_num_etc64), offset_etc64_(offset_etc64) {}
+  uint64_t file_num_etc64_;
+  uint64_t offset_etc64_;
+};
+
+constexpr uint8_t kCacheKeySize = static_cast<uint8_t>(sizeof(CacheKey));
+
+// A file-specific generator of cache keys, sometimes referred to as the
+// "base" cache key for a file because all the cache keys for various offsets
+// within the file are computed using simple arithmetic. The basis for the
+// general approach is dicussed here: https://github.com/pdillinger/unique_id
+// Heavily related to GetUniqueIdFromTableProperties.
+//
+// If the db_id, db_session_id, and file_number come from the file's table
+// properties, then the keys will be stable across DB::Open/Close, backup/
+// restore, import/export, etc.
+//
+// This class "is a" CacheKey only privately so that it is not misused as
+// a ready-to-use CacheKey.
+class OffsetableCacheKey : private CacheKey {
+ public:
+  // For convenience, constructs an "empty" cache key that should not be used.
+  inline OffsetableCacheKey() : CacheKey() {}
+
+  // Constructs an OffsetableCacheKey with the given information about a file.
+  // This constructor never generates an "empty" base key.
+  OffsetableCacheKey(const std::string &db_id, const std::string &db_session_id,
+                     uint64_t file_number);
+
+  // Creates an OffsetableCacheKey from an SST unique ID, so that cache keys
+  // can be derived from DB manifest data before reading the file from
+  // storage--so that every part of the file can potentially go in a persistent
+  // cache.
+  //
+  // Calling GetSstInternalUniqueId() on a db_id, db_session_id, and
+  // file_number and passing the result to this function produces the same
+  // base cache key as feeding those inputs directly to the constructor.
+  //
+  // This is a bijective transformation assuming either id is empty or
+  // lower 64 bits is non-zero:
+  // * Empty (all zeros) input -> empty (all zeros) output
+  // * Lower 64 input is non-zero -> lower 64 output (file_num_etc64_) is
+  //   non-zero
+  static OffsetableCacheKey FromInternalUniqueId(UniqueIdPtr id);
+
+  // This is the inverse transformation to the above, assuming either empty
+  // or lower 64 bits (file_num_etc64_) is non-zero. Perhaps only useful for
+  // testing.
+  UniqueId64x2 ToInternalUniqueId();
+
+  inline bool IsEmpty() const {
+    bool result = file_num_etc64_ == 0;
+    assert(!(offset_etc64_ > 0 && result));
+    return result;
+  }
+
+  // Construct a CacheKey for an offset within a file. An offset is not
+  // necessarily a byte offset if a smaller unique identifier of keyable
+  // offsets is used.
+  //
+  // This class was designed to make this hot code extremely fast.
+  inline CacheKey WithOffset(uint64_t offset) const {
+    assert(!IsEmpty());
+    return CacheKey(file_num_etc64_, offset_etc64_ ^ offset);
+  }
+
+  // The "common prefix" is a shared prefix for all the returned CacheKeys.
+  // It is specific to the file but the same for all offsets within the file.
+  static constexpr size_t kCommonPrefixSize = 8;
+  inline Slice CommonPrefixSlice() const {
+    static_assert(sizeof(file_num_etc64_) == kCommonPrefixSize,
+                  "8 byte common prefix expected");
+    assert(!IsEmpty());
+    assert(&this->file_num_etc64_ == static_cast<const void *>(this));
+
+    return Slice(reinterpret_cast<const char *>(this), kCommonPrefixSize);
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_reservation_manager.cc
+++ b/cache/cache_reservation_manager.cc
@ -0,0 +1,184 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "cache/cache_reservation_manager.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+
+#include "rocksdb/cache.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "table/block_based/reader_common.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <CacheEntryRole R>
+CacheReservationManagerImpl<R>::CacheReservationHandle::CacheReservationHandle(
+    std::size_t incremental_memory_used,
+    std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr)
+    : incremental_memory_used_(incremental_memory_used) {
+  assert(cache_res_mgr);
+  cache_res_mgr_ = cache_res_mgr;
+}
+
+template <CacheEntryRole R>
+CacheReservationManagerImpl<
+    R>::CacheReservationHandle::~CacheReservationHandle() {
+  Status s = cache_res_mgr_->ReleaseCacheReservation(incremental_memory_used_);
+  s.PermitUncheckedError();
+}
+
+template <CacheEntryRole R>
+CacheReservationManagerImpl<R>::CacheReservationManagerImpl(
+    std::shared_ptr<Cache> cache, bool delayed_decrease)
+    : cache_(cache),
+      delayed_decrease_(delayed_decrease),
+      cache_allocated_size_(0),
+      memory_used_(0) {
+  assert(cache != nullptr);
+}
+
+template <CacheEntryRole R>
+CacheReservationManagerImpl<R>::~CacheReservationManagerImpl() {
+  for (auto* handle : dummy_handles_) {
+    cache_.ReleaseAndEraseIfLastRef(handle);
+  }
+}
+
+template <CacheEntryRole R>
+Status CacheReservationManagerImpl<R>::UpdateCacheReservation(
+    std::size_t new_mem_used) {
+  memory_used_ = new_mem_used;
+  std::size_t cur_cache_allocated_size =
+      cache_allocated_size_.load(std::memory_order_relaxed);
+  if (new_mem_used == cur_cache_allocated_size) {
+    return Status::OK();
+  } else if (new_mem_used > cur_cache_allocated_size) {
+    Status s = IncreaseCacheReservation(new_mem_used);
+    return s;
+  } else {
+    // In delayed decrease mode, we don't decrease cache reservation
+    // untill the memory usage is less than 3/4 of what we reserve
+    // in the cache.
+    // We do this because
+    // (1) Dummy entry insertion is expensive in block cache
+    // (2) Delayed releasing previously inserted dummy entries can save such
+    // expensive dummy entry insertion on memory increase in the near future,
+    // which is likely to happen when the memory usage is greater than or equal
+    // to 3/4 of what we reserve
+    if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) {
+      return Status::OK();
+    } else {
+      Status s = DecreaseCacheReservation(new_mem_used);
+      return s;
+    }
+  }
+}
+
+template <CacheEntryRole R>
+Status CacheReservationManagerImpl<R>::MakeCacheReservation(
+    std::size_t incremental_memory_used,
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>* handle) {
+  assert(handle);
+  Status s =
+      UpdateCacheReservation(GetTotalMemoryUsed() + incremental_memory_used);
+  (*handle).reset(new CacheReservationManagerImpl::CacheReservationHandle(
+      incremental_memory_used,
+      std::enable_shared_from_this<
+          CacheReservationManagerImpl<R>>::shared_from_this()));
+  return s;
+}
+
+template <CacheEntryRole R>
+Status CacheReservationManagerImpl<R>::ReleaseCacheReservation(
+    std::size_t incremental_memory_used) {
+  assert(GetTotalMemoryUsed() >= incremental_memory_used);
+  std::size_t updated_total_mem_used =
+      GetTotalMemoryUsed() - incremental_memory_used;
+  Status s = UpdateCacheReservation(updated_total_mem_used);
+  return s;
+}
+
+template <CacheEntryRole R>
+Status CacheReservationManagerImpl<R>::IncreaseCacheReservation(
+    std::size_t new_mem_used) {
+  Status return_status = Status::OK();
+  while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) {
+    Cache::Handle* handle = nullptr;
+    return_status = cache_.Insert(GetNextCacheKey(), kSizeDummyEntry, &handle);
+
+    if (return_status != Status::OK()) {
+      return return_status;
+    }
+
+    dummy_handles_.push_back(handle);
+    cache_allocated_size_ += kSizeDummyEntry;
+  }
+  return return_status;
+}
+
+template <CacheEntryRole R>
+Status CacheReservationManagerImpl<R>::DecreaseCacheReservation(
+    std::size_t new_mem_used) {
+  Status return_status = Status::OK();
+
+  // Decrease to the smallest multiple of kSizeDummyEntry that is greater than
+  // or equal to new_mem_used We do addition instead of new_mem_used <=
+  // cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to
+  // avoid underflow of size_t when cache_allocated_size_ = 0
+  while (new_mem_used + kSizeDummyEntry <=
+         cache_allocated_size_.load(std::memory_order_relaxed)) {
+    assert(!dummy_handles_.empty());
+    auto* handle = dummy_handles_.back();
+    cache_.ReleaseAndEraseIfLastRef(handle);
+    dummy_handles_.pop_back();
+    cache_allocated_size_ -= kSizeDummyEntry;
+  }
+  return return_status;
+}
+
+template <CacheEntryRole R>
+std::size_t CacheReservationManagerImpl<R>::GetTotalReservedCacheSize() {
+  return cache_allocated_size_.load(std::memory_order_relaxed);
+}
+
+template <CacheEntryRole R>
+std::size_t CacheReservationManagerImpl<R>::GetTotalMemoryUsed() {
+  return memory_used_;
+}
+
+template <CacheEntryRole R>
+Slice CacheReservationManagerImpl<R>::GetNextCacheKey() {
+  // Calling this function will have the side-effect of changing the
+  // underlying cache_key_ that is shared among other keys generated from this
+  // fucntion. Therefore please make sure the previous keys are saved/copied
+  // before calling this function.
+  cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get());
+  return cache_key_.AsSlice();
+}
+
+template <CacheEntryRole R>
+const Cache::CacheItemHelper*
+CacheReservationManagerImpl<R>::TEST_GetCacheItemHelperForRole() {
+  return CacheInterface::GetHelper();
+}
+
+template class CacheReservationManagerImpl<
+    CacheEntryRole::kBlockBasedTableReader>;
+template class CacheReservationManagerImpl<
+    CacheEntryRole::kCompressionDictionaryBuildingBuffer>;
+template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
+template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
+template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
+template class CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>;
+template class CacheReservationManagerImpl<CacheEntryRole::kBlobCache>;
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_reservation_manager.h
+++ b/cache/cache_reservation_manager.h
@ -0,0 +1,317 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_key.h"
+#include "cache/typed_cache.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+// CacheReservationManager is an interface for reserving cache space for the
+// memory used
+class CacheReservationManager {
+ public:
+  // CacheReservationHandle is for managing the lifetime of a cache reservation
+  // for an incremental amount of memory used (i.e, incremental_memory_used)
+  class CacheReservationHandle {
+   public:
+    virtual ~CacheReservationHandle() {}
+  };
+  virtual ~CacheReservationManager() {}
+  virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
+  // TODO(hx235): replace the usage of
+  // `UpdateCacheReservation(memory_used_delta, increase)` with
+  // `UpdateCacheReservation(new_memory_used)` so that we only have one
+  // `UpdateCacheReservation` function
+  virtual Status UpdateCacheReservation(std::size_t memory_used_delta,
+                                        bool increase) = 0;
+  virtual Status MakeCacheReservation(
+      std::size_t incremental_memory_used,
+      std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+          *handle) = 0;
+  virtual std::size_t GetTotalReservedCacheSize() = 0;
+  virtual std::size_t GetTotalMemoryUsed() = 0;
+};
+
+// CacheReservationManagerImpl implements interface CacheReservationManager
+// for reserving cache space for the memory used by inserting/releasing dummy
+// entries in the cache.
+//
+// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
+// can be called without external synchronization.
+template <CacheEntryRole R>
+class CacheReservationManagerImpl
+    : public CacheReservationManager,
+      public std::enable_shared_from_this<CacheReservationManagerImpl<R>> {
+ public:
+  class CacheReservationHandle
+      : public CacheReservationManager::CacheReservationHandle {
+   public:
+    CacheReservationHandle(
+        std::size_t incremental_memory_used,
+        std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr);
+    ~CacheReservationHandle() override;
+
+   private:
+    std::size_t incremental_memory_used_;
+    std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr_;
+  };
+
+  // Construct a CacheReservationManagerImpl
+  // @param cache The cache where dummy entries are inserted and released for
+  // reserving cache space
+  // @param delayed_decrease If set true, then dummy entries won't be released
+  //                         immediately when memory usage decreases.
+  //                         Instead, it will be released when the memory usage
+  //                         decreases to 3/4 of what we have reserved so far.
+  //                         This is for saving some future dummy entry
+  //                         insertion when memory usage increases are likely to
+  //                         happen in the near future.
+  //
+  // REQUIRED: cache is not nullptr
+  explicit CacheReservationManagerImpl(std::shared_ptr<Cache> cache,
+                                       bool delayed_decrease = false);
+
+  // no copy constructor, copy assignment, move constructor, move assignment
+  CacheReservationManagerImpl(const CacheReservationManagerImpl &) = delete;
+  CacheReservationManagerImpl &operator=(const CacheReservationManagerImpl &) =
+      delete;
+  CacheReservationManagerImpl(CacheReservationManagerImpl &&) = delete;
+  CacheReservationManagerImpl &operator=(CacheReservationManagerImpl &&) =
+      delete;
+
+  ~CacheReservationManagerImpl() override;
+
+  // One of the two ways of reserving/releasing cache space,
+  // see MakeCacheReservation() for the other.
+  //
+  // Use ONLY one of these two ways to prevent unexpected behavior.
+  //
+  // Insert and release dummy entries in the cache to
+  // match the size of total dummy entries with the least multiple of
+  // kSizeDummyEntry greater than or equal to new_mem_used
+  //
+  // Insert dummy entries if new_memory_used > cache_allocated_size_;
+  //
+  // Release dummy entries if new_memory_used < cache_allocated_size_
+  // (and new_memory_used < cache_allocated_size_ * 3/4
+  // when delayed_decrease is set true);
+  //
+  // Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
+  // or (2) new_memory_used is in the interval of
+  // [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
+  // is set true.
+  //
+  // @param new_memory_used The number of bytes used by new memory
+  //        The most recent new_memoy_used passed in will be returned
+  //        in GetTotalMemoryUsed() even when the call return non-ok status.
+  //
+  //        Since the class is NOT thread-safe, external synchronization on the
+  //        order of calling UpdateCacheReservation() is needed if you want
+  //        GetTotalMemoryUsed() indeed returns the latest memory used.
+  //
+  // @return On inserting dummy entries, it returns Status::OK() if all dummy
+  //         entry insertions succeed.
+  //         Otherwise, it returns the first non-ok status;
+  //         On releasing dummy entries, it always returns Status::OK().
+  //         On keeping dummy entries the same, it always returns Status::OK().
+  Status UpdateCacheReservation(std::size_t new_memory_used) override;
+
+  Status UpdateCacheReservation(std::size_t /* memory_used_delta */,
+                                bool /* increase */) override {
+    return Status::NotSupported();
+  }
+
+  // One of the two ways of reserving cache space and releasing is done through
+  // destruction of CacheReservationHandle.
+  // See UpdateCacheReservation() for the other way.
+  //
+  // Use ONLY one of these two ways to prevent unexpected behavior.
+  //
+  // Insert dummy entries in the cache for the incremental memory usage
+  // to match the size of total dummy entries with the least multiple of
+  // kSizeDummyEntry greater than or equal to the total memory used.
+  //
+  // A CacheReservationHandle is returned as an output parameter.
+  // The reserved dummy entries are automatically released on the destruction of
+  // this handle, which achieves better RAII per cache reservation.
+  //
+  // WARNING: Deallocate all the handles of the CacheReservationManager object
+  //          before deallocating the object to prevent unexpected behavior.
+  //
+  // @param incremental_memory_used The number of bytes increased in memory
+  //        usage.
+  //
+  //        Calling GetTotalMemoryUsed() afterward will return the total memory
+  //        increased by this number, even when calling MakeCacheReservation()
+  //        returns non-ok status.
+  //
+  //        Since the class is NOT thread-safe, external synchronization in
+  //        calling MakeCacheReservation() is needed if you want
+  //        GetTotalMemoryUsed() indeed returns the latest memory used.
+  //
+  // @param handle An pointer to std::unique_ptr<CacheReservationHandle> that
+  //        manages the lifetime of the cache reservation represented by the
+  //        handle.
+  //
+  // @return It returns Status::OK() if all dummy
+  //         entry insertions succeed.
+  //         Otherwise, it returns the first non-ok status;
+  //
+  // REQUIRES: handle != nullptr
+  Status MakeCacheReservation(
+      std::size_t incremental_memory_used,
+      std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
+      override;
+
+  // Return the size of the cache (which is a multiple of kSizeDummyEntry)
+  // successfully reserved by calling UpdateCacheReservation().
+  //
+  // When UpdateCacheReservation() returns non-ok status,
+  // calling GetTotalReservedCacheSize() after that might return a slightly
+  // smaller number than the actual reserved cache size due to
+  // the returned number will always be a multiple of kSizeDummyEntry
+  // and cache full might happen in the middle of inserting a dummy entry.
+  std::size_t GetTotalReservedCacheSize() override;
+
+  // Return the latest total memory used indicated by the most recent call of
+  // UpdateCacheReservation(std::size_t new_memory_used);
+  std::size_t GetTotalMemoryUsed() override;
+
+  static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
+
+  // For testing only - it is to help ensure the CacheItemHelperForRole<R>
+  // accessed from CacheReservationManagerImpl and the one accessed from the
+  // test are from the same translation units
+  static const Cache::CacheItemHelper *TEST_GetCacheItemHelperForRole();
+
+ private:
+  static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
+
+  Slice GetNextCacheKey();
+
+  Status ReleaseCacheReservation(std::size_t incremental_memory_used);
+  Status IncreaseCacheReservation(std::size_t new_mem_used);
+  Status DecreaseCacheReservation(std::size_t new_mem_used);
+
+  using CacheInterface = PlaceholderSharedCacheInterface<R>;
+  CacheInterface cache_;
+  bool delayed_decrease_;
+  std::atomic<std::size_t> cache_allocated_size_;
+  std::size_t memory_used_;
+  std::vector<Cache::Handle *> dummy_handles_;
+  CacheKey cache_key_;
+};
+
+class ConcurrentCacheReservationManager
+    : public CacheReservationManager,
+      public std::enable_shared_from_this<ConcurrentCacheReservationManager> {
+ public:
+  class CacheReservationHandle
+      : public CacheReservationManager::CacheReservationHandle {
+   public:
+    CacheReservationHandle(
+        std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr,
+        std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+            cache_res_handle) {
+      assert(cache_res_mgr && cache_res_handle);
+      cache_res_mgr_ = cache_res_mgr;
+      cache_res_handle_ = std::move(cache_res_handle);
+    }
+
+    ~CacheReservationHandle() override {
+      std::lock_guard<std::mutex> lock(cache_res_mgr_->cache_res_mgr_mu_);
+      cache_res_handle_.reset();
+    }
+
+   private:
+    std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+        cache_res_handle_;
+  };
+
+  explicit ConcurrentCacheReservationManager(
+      std::shared_ptr<CacheReservationManager> cache_res_mgr) {
+    cache_res_mgr_ = std::move(cache_res_mgr);
+  }
+  ConcurrentCacheReservationManager(const ConcurrentCacheReservationManager &) =
+      delete;
+  ConcurrentCacheReservationManager &operator=(
+      const ConcurrentCacheReservationManager &) = delete;
+  ConcurrentCacheReservationManager(ConcurrentCacheReservationManager &&) =
+      delete;
+  ConcurrentCacheReservationManager &operator=(
+      ConcurrentCacheReservationManager &&) = delete;
+
+  ~ConcurrentCacheReservationManager() override {}
+
+  inline Status UpdateCacheReservation(std::size_t new_memory_used) override {
+    std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+    return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
+  }
+
+  inline Status UpdateCacheReservation(std::size_t memory_used_delta,
+                                       bool increase) override {
+    std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+    std::size_t total_mem_used = cache_res_mgr_->GetTotalMemoryUsed();
+    Status s;
+    if (!increase) {
+      assert(total_mem_used >= memory_used_delta);
+      s = cache_res_mgr_->UpdateCacheReservation(total_mem_used -
+                                                 memory_used_delta);
+    } else {
+      s = cache_res_mgr_->UpdateCacheReservation(total_mem_used +
+                                                 memory_used_delta);
+    }
+    return s;
+  }
+
+  inline Status MakeCacheReservation(
+      std::size_t incremental_memory_used,
+      std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
+      override {
+    std::unique_ptr<CacheReservationManager::CacheReservationHandle>
+        wrapped_handle;
+    Status s;
+    {
+      std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+      s = cache_res_mgr_->MakeCacheReservation(incremental_memory_used,
+                                               &wrapped_handle);
+    }
+    (*handle).reset(
+        new ConcurrentCacheReservationManager::CacheReservationHandle(
+            std::enable_shared_from_this<
+                ConcurrentCacheReservationManager>::shared_from_this(),
+            std::move(wrapped_handle)));
+    return s;
+  }
+  inline std::size_t GetTotalReservedCacheSize() override {
+    return cache_res_mgr_->GetTotalReservedCacheSize();
+  }
+  inline std::size_t GetTotalMemoryUsed() override {
+    std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
+    return cache_res_mgr_->GetTotalMemoryUsed();
+  }
+
+ private:
+  std::mutex cache_res_mgr_mu_;
+  std::shared_ptr<CacheReservationManager> cache_res_mgr_;
+};
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/cache_reservation_manager_test.cc
+++ b/cache/cache_reservation_manager_test.cc
@ -0,0 +1,469 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "cache/cache_reservation_manager.h"
+
+#include <cstddef>
+#include <cstring>
+#include <memory>
+
+#include "cache/cache_entry_roles.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/slice.h"
+#include "test_util/testharness.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+class CacheReservationManagerTest : public ::testing::Test {
+ protected:
+  static constexpr std::size_t kSizeDummyEntry =
+      CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
+  static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
+  static constexpr int kNumShardBits = 0;  // 2^0 shard
+  static constexpr std::size_t kMetaDataChargeOverhead = 10000;
+
+  std::shared_ptr<Cache> cache = NewLRUCache(kCacheCapacity, kNumShardBits);
+  std::shared_ptr<CacheReservationManager> test_cache_rev_mng;
+
+  CacheReservationManagerTest() {
+    test_cache_rev_mng =
+        std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+            cache);
+  }
+};
+
+TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
+  std::size_t new_mem_used = 1 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  ASSERT_EQ(s, Status::OK());
+  ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
+  ASSERT_LT(cache->GetPinnedUsage(),
+            1 * kSizeDummyEntry + kMetaDataChargeOverhead);
+
+  // Next unique Cache key
+  CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
+  // Get to the underlying values
+  uint64_t* ckey_data = reinterpret_cast<uint64_t*>(&ckey);
+  // Back it up to the one used by CRM (using CacheKey implementation details)
+  ckey_data[1]--;
+
+  // Specific key (subject to implementation details)
+  EXPECT_EQ(ckey_data[0], 0);
+  EXPECT_EQ(ckey_data[1], 2);
+
+  Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
+  EXPECT_NE(handle, nullptr)
+      << "Failed to generate the cache key for the dummy entry correctly";
+  // Clean up the returned handle from Lookup() to prevent memory leak
+  cache->Release(handle);
+}
+
+TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) {
+  std::size_t new_mem_used = 1 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  ASSERT_EQ(s, Status::OK());
+  ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry);
+  ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
+  std::size_t initial_pinned_usage = cache->GetPinnedUsage();
+  ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry);
+  ASSERT_LT(initial_pinned_usage,
+            1 * kSizeDummyEntry + kMetaDataChargeOverhead);
+
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to keep cache reservation the same when new_mem_used equals "
+         "to current cache reservation";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly when new_mem_used equals to current "
+         "cache reservation";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly when new_mem_used "
+         "equals to current cache reservation";
+  EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
+      << "Failed to keep underlying dummy entries the same when new_mem_used "
+         "equals to current cache reservation";
+}
+
+TEST_F(CacheReservationManagerTest,
+       IncreaseCacheReservationByMultiplesOfDummyEntrySize) {
+  std::size_t new_mem_used = 2 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to increase cache reservation correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            2 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation increase correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
+      << "Failed to increase underlying dummy entries in cache correctly";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            2 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to increase underlying dummy entries in cache correctly";
+}
+
+TEST_F(CacheReservationManagerTest,
+       IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
+  std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to increase cache reservation correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            3 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation increase correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry)
+      << "Failed to increase underlying dummy entries in cache correctly";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            3 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to increase underlying dummy entries in cache correctly";
+}
+
+TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest,
+     IncreaseCacheReservationOnFullCache) {
+  ;
+  constexpr std::size_t kSizeDummyEntry =
+      CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
+  constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry;
+  constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry;
+  constexpr std::size_t kMetaDataChargeOverhead = 10000;
+
+  LRUCacheOptions lo;
+  lo.capacity = kSmallCacheCapacity;
+  lo.num_shard_bits = 0;  // 2^0 shard
+  lo.strict_capacity_limit = true;
+  std::shared_ptr<Cache> cache = NewLRUCache(lo);
+  std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
+      std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+          cache);
+
+  std::size_t new_mem_used = kSmallCacheCapacity + 1;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::MemoryLimit())
+      << "Failed to return status to indicate failure of dummy entry insertion "
+         "during cache reservation on full cache";
+  EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly before cache resevation failure happens "
+         "due to full cache";
+  EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            kSmallCacheCapacity)
+      << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
+         "entry insertions) when encountering cache resevation failure due to "
+         "full cache";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
+      << "Failed to insert underlying dummy entries correctly when "
+         "encountering cache resevation failure due to full cache";
+  EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
+      << "Failed to insert underlying dummy entries correctly when "
+         "encountering cache resevation failure due to full cache";
+
+  new_mem_used = kSmallCacheCapacity / 2;  // 2 dummy entries
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to decrease cache reservation after encountering cache "
+         "reservation failure due to full cache";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            2 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation decrease correctly after "
+         "encountering cache reservation due to full cache";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
+      << "Failed to release underlying dummy entries correctly on cache "
+         "reservation decrease after encountering cache resevation failure due "
+         "to full cache";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            2 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to release underlying dummy entries correctly on cache "
+         "reservation decrease after encountering cache resevation failure due "
+         "to full cache";
+
+  // Create cache full again for subsequent tests
+  new_mem_used = kSmallCacheCapacity + 1;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::MemoryLimit())
+      << "Failed to return status to indicate failure of dummy entry insertion "
+         "during cache reservation on full cache";
+  EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly before cache resevation failure happens "
+         "due to full cache";
+  EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            kSmallCacheCapacity)
+      << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
+         "entry insertions) when encountering cache resevation failure due to "
+         "full cache";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
+      << "Failed to insert underlying dummy entries correctly when "
+         "encountering cache resevation failure due to full cache";
+  EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
+      << "Failed to insert underlying dummy entries correctly when "
+         "encountering cache resevation failure due to full cache";
+
+  // Increase cache capacity so the previously failed insertion can fully
+  // succeed
+  cache->SetCapacity(kBigCacheCapacity);
+  new_mem_used = kSmallCacheCapacity + 1;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to increase cache reservation after increasing cache capacity "
+         "and mitigating cache full error";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            5 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation increase correctly after "
+         "increasing cache capacity and mitigating cache full error";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry)
+      << "Failed to insert underlying dummy entries correctly after increasing "
+         "cache capacity and mitigating cache full error";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            5 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to insert underlying dummy entries correctly after increasing "
+         "cache capacity and mitigating cache full error";
+}
+
+TEST_F(CacheReservationManagerTest,
+       DecreaseCacheReservationByMultiplesOfDummyEntrySize) {
+  std::size_t new_mem_used = 2 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  ASSERT_EQ(s, Status::OK());
+  ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            2 * kSizeDummyEntry);
+  ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
+  ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
+  ASSERT_LT(cache->GetPinnedUsage(),
+            2 * kSizeDummyEntry + kMetaDataChargeOverhead);
+
+  new_mem_used = 1 * kSizeDummyEntry;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to decrease cache reservation correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation decrease correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
+      << "Failed to decrease underlying dummy entries in cache correctly";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            1 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to decrease underlying dummy entries in cache correctly";
+}
+
+TEST_F(CacheReservationManagerTest,
+       DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
+  std::size_t new_mem_used = 2 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  ASSERT_EQ(s, Status::OK());
+  ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            2 * kSizeDummyEntry);
+  ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
+  ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
+  ASSERT_LT(cache->GetPinnedUsage(),
+            2 * kSizeDummyEntry + kMetaDataChargeOverhead);
+
+  new_mem_used = kSizeDummyEntry / 2;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to decrease cache reservation correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            1 * kSizeDummyEntry)
+      << "Failed to bookkeep cache reservation decrease correctly";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
+      << "Failed to decrease underlying dummy entries in cache correctly";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            1 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to decrease underlying dummy entries in cache correctly";
+}
+
+TEST(CacheReservationManagerWithDelayedDecreaseTest,
+     DecreaseCacheReservationWithDelayedDecrease) {
+  constexpr std::size_t kSizeDummyEntry =
+      CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
+  constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
+  constexpr std::size_t kMetaDataChargeOverhead = 10000;
+
+  LRUCacheOptions lo;
+  lo.capacity = kCacheCapacity;
+  lo.num_shard_bits = 0;
+  std::shared_ptr<Cache> cache = NewLRUCache(lo);
+  std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
+      std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+          cache, true /* delayed_decrease */);
+
+  std::size_t new_mem_used = 8 * kSizeDummyEntry;
+  Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  ASSERT_EQ(s, Status::OK());
+  ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            8 * kSizeDummyEntry);
+  ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
+  std::size_t initial_pinned_usage = cache->GetPinnedUsage();
+  ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry);
+  ASSERT_LT(initial_pinned_usage,
+            8 * kSizeDummyEntry + kMetaDataChargeOverhead);
+
+  new_mem_used = 6 * kSizeDummyEntry;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            8 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly when delaying cache reservation "
+         "decrease";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
+      << "Failed to delay decreasing underlying dummy entries in cache";
+
+  new_mem_used = 7 * kSizeDummyEntry;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            8 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly when delaying cache reservation "
+         "decrease";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
+      << "Failed to delay decreasing underlying dummy entries in cache";
+
+  new_mem_used = 6 * kSizeDummyEntry - 1;
+  s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+  EXPECT_EQ(s, Status::OK())
+      << "Failed to decrease cache reservation correctly when new_mem_used < "
+         "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
+            6 * kSizeDummyEntry)
+      << "Failed to bookkeep correctly when new_mem_used < "
+         "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
+      << "Failed to bookkeep the used memory correctly";
+  EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry)
+      << "Failed to decrease underlying dummy entries in cache when "
+         "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
+         "decrease mode";
+  EXPECT_LT(cache->GetPinnedUsage(),
+            6 * kSizeDummyEntry + kMetaDataChargeOverhead)
+      << "Failed to decrease underlying dummy entries in cache when "
+         "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
+         "decrease mode";
+}
+
+TEST(CacheReservationManagerDestructorTest,
+     ReleaseRemainingDummyEntriesOnDestruction) {
+  constexpr std::size_t kSizeDummyEntry =
+      CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
+  constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
+  constexpr std::size_t kMetaDataChargeOverhead = 10000;
+
+  LRUCacheOptions lo;
+  lo.capacity = kCacheCapacity;
+  lo.num_shard_bits = 0;
+  std::shared_ptr<Cache> cache = NewLRUCache(lo);
+  {
+    std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
+        std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+            cache);
+    std::size_t new_mem_used = 1 * kSizeDummyEntry;
+    Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
+    ASSERT_EQ(s, Status::OK());
+    ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
+    ASSERT_LT(cache->GetPinnedUsage(),
+              1 * kSizeDummyEntry + kMetaDataChargeOverhead);
+  }
+  EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry)
+      << "Failed to release remaining underlying dummy entries in cache in "
+         "CacheReservationManager's destructor";
+}
+
+TEST(CacheReservationHandleTest, HandleTest) {
+  constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024;
+  constexpr std::size_t kSizeDummyEntry = 256 * 1024;
+  constexpr std::size_t kMetaDataChargeOverhead = 10000;
+
+  LRUCacheOptions lo;
+  lo.capacity = kOneGigabyte;
+  lo.num_shard_bits = 0;
+  std::shared_ptr<Cache> cache = NewLRUCache(lo);
+
+  std::shared_ptr<CacheReservationManager> test_cache_rev_mng(
+      std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+          cache));
+
+  std::size_t mem_used = 0;
+  const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry;
+  const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry;
+  std::unique_ptr<CacheReservationManager::CacheReservationHandle> handle_1,
+      handle_2;
+
+  // To test consecutive CacheReservationManager::MakeCacheReservation works
+  // correctly in terms of returning the handle as well as updating cache
+  // reservation and the latest total memory used
+  Status s = test_cache_rev_mng->MakeCacheReservation(
+      incremental_mem_used_handle_1, &handle_1);
+  mem_used = mem_used + incremental_mem_used_handle_1;
+  ASSERT_EQ(s, Status::OK());
+  EXPECT_TRUE(handle_1 != nullptr);
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
+  EXPECT_GE(cache->GetPinnedUsage(), mem_used);
+  EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
+
+  s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2,
+                                               &handle_2);
+  mem_used = mem_used + incremental_mem_used_handle_2;
+  ASSERT_EQ(s, Status::OK());
+  EXPECT_TRUE(handle_2 != nullptr);
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
+  EXPECT_GE(cache->GetPinnedUsage(), mem_used);
+  EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
+
+  // To test
+  // CacheReservationManager::CacheReservationHandle::~CacheReservationHandle()
+  // works correctly in releasing the cache reserved for the handle
+  handle_1.reset();
+  EXPECT_TRUE(handle_1 == nullptr);
+  mem_used = mem_used - incremental_mem_used_handle_1;
+  EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
+  EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
+  EXPECT_GE(cache->GetPinnedUsage(), mem_used);
+  EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
+
+  // To test the actual CacheReservationManager object won't be deallocated
+  // as long as there remain handles pointing to it.
+  // We strongly recommend deallocating CacheReservationManager object only
+  // after all its handles are deallocated to keep things easy to reasonate
+  test_cache_rev_mng.reset();
+  EXPECT_GE(cache->GetPinnedUsage(), mem_used);
+  EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
+
+  handle_2.reset();
+  // The CacheReservationManager object is now deallocated since all the handles
+  // and its original pointer is gone
+  mem_used = mem_used - incremental_mem_used_handle_2;
+  EXPECT_EQ(mem_used, 0);
+  EXPECT_EQ(cache->GetPinnedUsage(), mem_used);
+}
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/cache/cache_test.cc
+++ b/cache/cache_test.cc
--- a/cache/charged_cache.cc
+++ b/cache/charged_cache.cc
@ -0,0 +1,109 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/charged_cache.h"
+
+#include "cache/cache_reservation_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ChargedCache::ChargedCache(std::shared_ptr<Cache> cache,
+                           std::shared_ptr<Cache> block_cache)
+    : CacheWrapper(cache),
+      cache_res_mgr_(std::make_shared<ConcurrentCacheReservationManager>(
+          std::make_shared<
+              CacheReservationManagerImpl<CacheEntryRole::kBlobCache>>(
+              block_cache))) {}
+
+Status ChargedCache::Insert(const Slice& key, ObjectPtr obj,
+                            const CacheItemHelper* helper, size_t charge,
+                            Handle** handle, Priority priority) {
+  Status s = target_->Insert(key, obj, helper, charge, handle, priority);
+  if (s.ok()) {
+    // Insert may cause the cache entry eviction if the cache is full. So we
+    // directly call the reservation manager to update the total memory used
+    // in the cache.
+    assert(cache_res_mgr_);
+    cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+        .PermitUncheckedError();
+  }
+  return s;
+}
+
+Cache::Handle* ChargedCache::Lookup(const Slice& key,
+                                    const CacheItemHelper* helper,
+                                    CreateContext* create_context,
+                                    Priority priority, Statistics* stats) {
+  auto handle = target_->Lookup(key, helper, create_context, priority, stats);
+  // Lookup may promote the KV pair from the secondary cache to the primary
+  // cache. So we directly call the reservation manager to update the total
+  // memory used in the cache.
+  if (helper && helper->create_cb) {
+    assert(cache_res_mgr_);
+    cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+        .PermitUncheckedError();
+  }
+  return handle;
+}
+
+void ChargedCache::WaitAll(AsyncLookupHandle* async_handles, size_t count) {
+  target_->WaitAll(async_handles, count);
+  // In case of any promotions. Although some could finish by return of
+  // StartAsyncLookup, Wait/WaitAll will generally be used, so simpler to
+  // update here.
+  assert(cache_res_mgr_);
+  cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+      .PermitUncheckedError();
+}
+
+bool ChargedCache::Release(Cache::Handle* handle, bool useful,
+                           bool erase_if_last_ref) {
+  size_t memory_used_delta = target_->GetUsage(handle);
+  bool erased = target_->Release(handle, useful, erase_if_last_ref);
+  if (erased) {
+    assert(cache_res_mgr_);
+    cache_res_mgr_
+        ->UpdateCacheReservation(memory_used_delta, /* increase */ false)
+        .PermitUncheckedError();
+  }
+  return erased;
+}
+
+bool ChargedCache::Release(Cache::Handle* handle, bool erase_if_last_ref) {
+  size_t memory_used_delta = target_->GetUsage(handle);
+  bool erased = target_->Release(handle, erase_if_last_ref);
+  if (erased) {
+    assert(cache_res_mgr_);
+    cache_res_mgr_
+        ->UpdateCacheReservation(memory_used_delta, /* increase */ false)
+        .PermitUncheckedError();
+  }
+  return erased;
+}
+
+void ChargedCache::Erase(const Slice& key) {
+  target_->Erase(key);
+  assert(cache_res_mgr_);
+  cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+      .PermitUncheckedError();
+}
+
+void ChargedCache::EraseUnRefEntries() {
+  target_->EraseUnRefEntries();
+  assert(cache_res_mgr_);
+  cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+      .PermitUncheckedError();
+}
+
+void ChargedCache::SetCapacity(size_t capacity) {
+  target_->SetCapacity(capacity);
+  // SetCapacity can result in evictions when the cache capacity is decreased,
+  // so we would want to update the cache reservation here as well.
+  assert(cache_res_mgr_);
+  cache_res_mgr_->UpdateCacheReservation(target_->GetUsage())
+      .PermitUncheckedError();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/charged_cache.h
+++ b/cache/charged_cache.h
@ -0,0 +1,59 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/advanced_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ConcurrentCacheReservationManager;
+
+// A cache interface which wraps around another cache and takes care of
+// reserving space in block cache towards a single global memory limit, and
+// forwards all the calls to the underlying cache.
+class ChargedCache : public CacheWrapper {
+ public:
+  ChargedCache(std::shared_ptr<Cache> cache,
+               std::shared_ptr<Cache> block_cache);
+
+  Status Insert(const Slice& key, ObjectPtr obj, const CacheItemHelper* helper,
+                size_t charge, Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override;
+
+  Cache::Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
+                        CreateContext* create_context,
+                        Priority priority = Priority::LOW,
+                        Statistics* stats = nullptr) override;
+
+  void WaitAll(AsyncLookupHandle* async_handles, size_t count) override;
+
+  bool Release(Cache::Handle* handle, bool useful,
+               bool erase_if_last_ref = false) override;
+  bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
+
+  void Erase(const Slice& key) override;
+  void EraseUnRefEntries() override;
+
+  static const char* kClassName() { return "ChargedCache"; }
+  const char* Name() const override { return kClassName(); }
+
+  void SetCapacity(size_t capacity) override;
+
+  inline Cache* GetCache() const { return target_.get(); }
+
+  inline ConcurrentCacheReservationManager* TEST_GetCacheReservationManager()
+      const {
+    return cache_res_mgr_.get();
+  }
+
+ private:
+  std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/clock_cache.cc
+++ b/cache/clock_cache.cc
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@ -0,0 +1,756 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "cache/cache_key.h"
+#include "cache/sharded_cache.h"
+#include "port/lang.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/secondary_cache.h"
+#include "util/autovector.h"
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace clock_cache {
+
+// Forward declaration of friend class.
+class ClockCacheTest;
+
+// HyperClockCache is an alternative to LRUCache specifically tailored for
+// use as BlockBasedTableOptions::block_cache
+//
+// Benefits
+// --------
+// * Fully lock free (no waits or spins) for efficiency under high concurrency
+// * Optimized for hot path reads. For concurrency control, most Lookup() and
+// essentially all Release() are a single atomic add operation.
+// * Eviction on insertion is fully parallel and lock-free.
+// * Uses a generalized + aging variant of CLOCK eviction that might outperform
+// LRU in some cases. (For background, see
+// https://en.wikipedia.org/wiki/Page_replacement_algorithm)
+//
+// Costs
+// -----
+// * Hash table is not resizable (for lock-free efficiency) so capacity is not
+// dynamically changeable. Rely on an estimated average value (block) size for
+// space+time efficiency. (See estimated_entry_charge option details.)
+// * Insert usually does not (but might) overwrite a previous entry associated
+// with a cache key. This is OK for RocksDB uses of Cache.
+// * Only supports keys of exactly 16 bytes, which is what RocksDB uses for
+// block cache (not row cache or table cache).
+// * SecondaryCache is not supported.
+// * Cache priorities are less aggressively enforced. Unlike LRUCache, enough
+// transient LOW or BOTTOM priority items can evict HIGH priority entries that
+// are not referenced recently (or often) enough.
+// * If pinned entries leave little or nothing eligible for eviction,
+// performance can degrade substantially, because of clock eviction eating
+// CPU looking for evictable entries and because Release does not
+// pro-actively delete unreferenced entries when the cache is over-full.
+// Specifically, this makes this implementation more susceptible to the
+// following combination:
+//   * num_shard_bits is high (e.g. 6)
+//   * capacity small (e.g. some MBs)
+//   * some large individual entries (e.g. non-partitioned filters)
+// where individual entries occupy a large portion of their shard capacity.
+// This should be mostly mitigated by the implementation picking a lower
+// number of cache shards than LRUCache for a given capacity (when
+// num_shard_bits is not overridden; see calls to GetDefaultCacheShardBits()).
+// * With strict_capacity_limit=false, respecting the capacity limit is not as
+// aggressive as LRUCache. The limit might be transiently exceeded by a very
+// small number of entries even when not strictly necessary, and slower to
+// recover after pinning forces limit to be substantially exceeded. (Even with
+// strict_capacity_limit=true, RocksDB will nevertheless transiently allocate
+// memory before discovering it is over the block cache capacity, so this
+// should not be a detectable regression in respecting memory limits, except
+// on exceptionally small caches.)
+// * In some cases, erased or duplicated entries might not be freed
+// immediately. They will eventually be freed by eviction from further Inserts.
+// * Internal metadata can overflow if the number of simultaneous references
+// to a cache handle reaches many millions.
+//
+// High-level eviction algorithm
+// -----------------------------
+// A score (or "countdown") is maintained for each entry, initially determined
+// by priority. The score is incremented on each Lookup, up to a max of 3,
+// though is easily returned to previous state if useful=false with Release.
+// During CLOCK-style eviction iteration, entries with score > 0 are
+// decremented if currently unreferenced and entries with score == 0 are
+// evicted if currently unreferenced. Note that scoring might not be perfect
+// because entries can be referenced transiently within the cache even when
+// there are no outside references to the entry.
+//
+// Cache sharding like LRUCache is used to reduce contention on usage+eviction
+// state, though here the performance improvement from more shards is small,
+// and (as noted above) potentially detrimental if shard capacity is too close
+// to largest entry size. Here cache sharding mostly only affects cache update
+// (Insert / Erase) performance, not read performance.
+//
+// Read efficiency (hot path)
+// --------------------------
+// Mostly to minimize the cost of accessing metadata blocks with
+// cache_index_and_filter_blocks=true, we focus on optimizing Lookup and
+// Release. In terms of concurrency, at a minimum, these operations have
+// to do reference counting (and Lookup has to compare full keys in a safe
+// way). Can we fold in all the other metadata tracking *for free* with
+// Lookup and Release doing a simple atomic fetch_add/fetch_sub? (Assume
+// for the moment that Lookup succeeds on the first probe.)
+//
+// We have a clever way of encoding an entry's reference count and countdown
+// clock so that Lookup and Release are each usually a single atomic addition.
+// In a single metadata word we have both an "acquire" count, incremented by
+// Lookup, and a "release" count, incremented by Release. If useful=false,
+// Release can instead decrement the acquire count. Thus the current ref
+// count is (acquires - releases), and the countdown clock is min(3, acquires).
+// Note that only unreferenced entries (acquires == releases) are eligible
+// for CLOCK manipulation and eviction. We tolerate use of more expensive
+// compare_exchange operations for cache writes (insertions and erasures).
+//
+// In a cache receiving many reads and little or no writes, it is possible
+// for the acquire and release counters to overflow. Assuming the *current*
+// refcount never reaches to many millions, we only have to correct for
+// overflow in both counters in Release, not in Lookup. The overflow check
+// should be only 1-2 CPU cycles per Release because it is a predictable
+// branch on a simple condition on data already in registers.
+//
+// Slot states
+// -----------
+// We encode a state indicator into the same metadata word with the
+// acquire and release counters. This allows bigger state transitions to
+// be atomic. States:
+//
+// * Empty - slot is not in use and unowned. All other metadata and data is
+// in an undefined state.
+// * Construction - slot is exclusively owned by one thread, the thread
+// successfully entering this state, for populating or freeing data.
+// * Shareable (group) - slot holds an entry with counted references for
+// pinning and reading, including
+//   * Visible - slot holds an entry that can be returned by Lookup
+//   * Invisible - slot holds an entry that is not visible to Lookup
+//     (erased by user) but can be read by existing references, and ref count
+//     changed by Ref and Release.
+//
+// A special case is "standalone" entries, which are heap-allocated handles
+// not in the table. They are always Invisible and freed on zero refs.
+//
+// State transitions:
+// Empty -> Construction (in Insert): The encoding of state enables Insert to
+// perform an optimistic atomic bitwise-or to take ownership if a slot is
+// empty, or otherwise make no state change.
+//
+// Construction -> Visible (in Insert): This can be a simple assignment to the
+// metadata word because the current thread has exclusive ownership and other
+// metadata is meaningless.
+//
+// Visible -> Invisible (in Erase): This can be a bitwise-and while holding
+// a shared reference, which is safe because the change is idempotent (in case
+// of parallel Erase). By the way, we never go Invisible->Visible.
+//
+// Shareable -> Construction (in Evict part of Insert, in Erase, and in
+// Release if Invisible): This is for starting to freeing/deleting an
+// unreferenced entry. We have to use compare_exchange to ensure we only make
+// this transition when there are zero refs.
+//
+// Construction -> Empty (in same places): This is for completing free/delete
+// of an entry. A "release" atomic store suffices, as we have exclusive
+// ownership of the slot but have to ensure none of the data member reads are
+// re-ordered after committing the state transition.
+//
+// Insert
+// ------
+// If Insert were to guarantee replacing an existing entry for a key, there
+// would be complications for concurrency and efficiency. First, consider how
+// many probes to get to an entry. To ensure Lookup never waits and
+// availability of a key is uninterrupted, we would need to use a different
+// slot for a new entry for the same key. This means it is most likely in a
+// later probing position than the old version, which should soon be removed.
+// (Also, an entry is too big to replace atomically, even if no current refs.)
+//
+// However, overwrite capability is not really needed by RocksDB. Also, we
+// know from our "redundant" stats that overwrites are very rare for the block
+// cache, so we should not spend much to make them effective.
+//
+// So instead we Insert as soon as we find an empty slot in the probing
+// sequence without seeing an existing (visible) entry for the same key. This
+// way we only insert if we can improve the probing performance, and we don't
+// need to probe beyond our insert position, assuming we are willing to let
+// the previous entry for the same key die of old age (eventual eviction from
+// not being used). We can reach a similar state with concurrent insertions,
+// where one will pass over the other while it is "under construction."
+// This temporary duplication is acceptable for RocksDB block cache because
+// we know redundant insertion is rare.
+//
+// Another problem to solve is what to return to the caller when we find an
+// existing entry whose probing position we cannot improve on, or when the
+// table occupancy limit has been reached. If strict_capacity_limit=false,
+// we must never fail Insert, and if a Handle* is provided, we have to return
+// a usable Cache handle on success. The solution to this (typically rare)
+// problem is "standalone" handles, which are usable by the caller but not
+// actually available for Lookup in the Cache. Standalone handles are allocated
+// independently on the heap and specially marked so that they are freed on
+// the heap when their last reference is released.
+//
+// Usage on capacity
+// -----------------
+// Insert takes different approaches to usage tracking depending on
+// strict_capacity_limit setting. If true, we enforce a kind of strong
+// consistency where compare-exchange is used to ensure the usage number never
+// exceeds its limit, and provide threads with an authoritative signal on how
+// much "usage" they have taken ownership of. With strict_capacity_limit=false,
+// we use a kind of "eventual consistency" where all threads Inserting to the
+// same cache shard might race on reserving the same space, but the
+// over-commitment will be worked out in later insertions. It is kind of a
+// dance because we don't want threads racing each other too much on paying
+// down the over-commitment (with eviction) either.
+//
+// Eviction
+// --------
+// A key part of Insert is evicting some entries currently unreferenced to
+// make room for new entries. The high-level eviction algorithm is described
+// above, but the details are also interesting. A key part is parallelizing
+// eviction with a single CLOCK pointer. This works by each thread working on
+// eviction pre-emptively incrementing the CLOCK pointer, and then CLOCK-
+// updating or evicting the incremented-over slot(s). To reduce contention at
+// the cost of possibly evicting too much, each thread increments the clock
+// pointer by 4, so commits to updating at least 4 slots per batch. As
+// described above, a CLOCK update will decrement the "countdown" of
+// unreferenced entries, or evict unreferenced entries with zero countdown.
+// Referenced entries are not updated, because we (presumably) don't want
+// long-referenced entries to age while referenced. Note however that we
+// cannot distinguish transiently referenced entries from cache user
+// references, so some CLOCK updates might be somewhat arbitrarily skipped.
+// This is OK as long as it is rare enough that eviction order is still
+// pretty good.
+//
+// There is no synchronization on the completion of the CLOCK updates, so it
+// is theoretically possible for another thread to cycle back around and have
+// two threads racing on CLOCK updates to the same slot. Thus, we cannot rely
+// on any implied exclusivity to make the updates or eviction more efficient.
+// These updates use an opportunistic compare-exchange (no loop), where a
+// racing thread might cause the update to be skipped without retry, but in
+// such case the update is likely not needed because the most likely update
+// to an entry is that it has become referenced. (TODO: test efficiency of
+// avoiding compare-exchange loop)
+//
+// Release
+// -------
+// In the common case, Release is a simple atomic increment of the release
+// counter. There is a simple overflow check that only does another atomic
+// update in extremely rare cases, so costs almost nothing.
+//
+// If the Release specifies "not useful", we can instead decrement the
+// acquire counter, which returns to the same CLOCK state as before Lookup
+// or Ref.
+//
+// Adding a check for over-full cache on every release to zero-refs would
+// likely be somewhat expensive, increasing read contention on cache shard
+// metadata. Instead we are less aggressive about deleting entries right
+// away in those cases.
+//
+// However Release tries to immediately delete entries reaching zero refs
+// if (a) erase_if_last_ref is set by the caller, or (b) the entry is already
+// marked invisible. Both of these are checks on values already in CPU
+// registers so do not increase cross-CPU contention when not applicable.
+// When applicable, they use a compare-exchange loop to take exclusive
+// ownership of the slot for freeing the entry. These are rare cases
+// that should not usually affect performance.
+//
+// Erase
+// -----
+// Searches for an entry like Lookup but moves it to Invisible state if found.
+// This state transition is with bit operations so is idempotent and safely
+// done while only holding a shared "read" reference. Like Release, it makes
+// a best effort to immediately release an Invisible entry that reaches zero
+// refs, but there are some corner cases where it will only be freed by the
+// clock eviction process.
+
+// ----------------------------------------------------------------------- //
+
+// The load factor p is a real number in (0, 1) such that at all
+// times at most a fraction p of all slots, without counting tombstones,
+// are occupied by elements. This means that the probability that a random
+// probe hits an occupied slot is at most p, and thus at most 1/p probes
+// are required on average. For example, p = 70% implies that between 1 and 2
+// probes are needed on average (bear in mind that this reasoning doesn't
+// consider the effects of clustering over time, which should be negligible
+// with double hashing).
+// Because the size of the hash table is always rounded up to the next
+// power of 2, p is really an upper bound on the actual load factor---the
+// actual load factor is anywhere between p/2 and p. This is a bit wasteful,
+// but bear in mind that slots only hold metadata, not actual values.
+// Since space cost is dominated by the values (the LSM blocks),
+// overprovisioning the table with metadata only increases the total cache space
+// usage by a tiny fraction.
+constexpr double kLoadFactor = 0.7;
+
+// The user can exceed kLoadFactor if the sizes of the inserted values don't
+// match estimated_value_size, or in some rare cases with
+// strict_capacity_limit == false. To avoid degenerate performance, we set a
+// strict upper bound on the load factor.
+constexpr double kStrictLoadFactor = 0.84;
+
+struct ClockHandleBasicData {
+  Cache::ObjectPtr value = nullptr;
+  const Cache::CacheItemHelper* helper = nullptr;
+  // A lossless, reversible hash of the fixed-size (16 byte) cache key. This
+  // eliminates the need to store a hash separately.
+  UniqueId64x2 hashed_key = kNullUniqueId64x2;
+  size_t total_charge = 0;
+
+  inline size_t GetTotalCharge() const { return total_charge; }
+
+  // Calls deleter (if non-null) on cache key and value
+  void FreeData(MemoryAllocator* allocator) const;
+
+  // Required by concept HandleImpl
+  const UniqueId64x2& GetHash() const { return hashed_key; }
+};
+
+struct ClockHandle : public ClockHandleBasicData {
+  // Constants for handling the atomic `meta` word, which tracks most of the
+  // state of the handle. The meta word looks like this:
+  // low bits                                                     high bits
+  // -----------------------------------------------------------------------
+  // | acquire counter          | release counter           | state marker |
+  // -----------------------------------------------------------------------
+
+  // For reading or updating counters in meta word.
+  static constexpr uint8_t kCounterNumBits = 30;
+  static constexpr uint64_t kCounterMask = (uint64_t{1} << kCounterNumBits) - 1;
+
+  static constexpr uint8_t kAcquireCounterShift = 0;
+  static constexpr uint64_t kAcquireIncrement = uint64_t{1}
+                                                << kAcquireCounterShift;
+  static constexpr uint8_t kReleaseCounterShift = kCounterNumBits;
+  static constexpr uint64_t kReleaseIncrement = uint64_t{1}
+                                                << kReleaseCounterShift;
+
+  // For reading or updating the state marker in meta word
+  static constexpr uint8_t kStateShift = 2U * kCounterNumBits;
+
+  // Bits contribution to state marker.
+  // Occupied means any state other than empty
+  static constexpr uint8_t kStateOccupiedBit = 0b100;
+  // Shareable means the entry is reference counted (visible or invisible)
+  // (only set if also occupied)
+  static constexpr uint8_t kStateShareableBit = 0b010;
+  // Visible is only set if also shareable
+  static constexpr uint8_t kStateVisibleBit = 0b001;
+
+  // Complete state markers (not shifted into full word)
+  static constexpr uint8_t kStateEmpty = 0b000;
+  static constexpr uint8_t kStateConstruction = kStateOccupiedBit;
+  static constexpr uint8_t kStateInvisible =
+      kStateOccupiedBit | kStateShareableBit;
+  static constexpr uint8_t kStateVisible =
+      kStateOccupiedBit | kStateShareableBit | kStateVisibleBit;
+
+  // Constants for initializing the countdown clock. (Countdown clock is only
+  // in effect with zero refs, acquire counter == release counter, and in that
+  // case the countdown clock == both of those counters.)
+  static constexpr uint8_t kHighCountdown = 3;
+  static constexpr uint8_t kLowCountdown = 2;
+  static constexpr uint8_t kBottomCountdown = 1;
+  // During clock update, treat any countdown clock value greater than this
+  // value the same as this value.
+  static constexpr uint8_t kMaxCountdown = kHighCountdown;
+  // TODO: make these coundown values tuning parameters for eviction?
+
+  // See above. Mutable for read reference counting.
+  mutable std::atomic<uint64_t> meta{};
+
+  // Whether this is a "deteched" handle that is independently allocated
+  // with `new` (so must be deleted with `delete`).
+  // TODO: ideally this would be packed into some other data field, such
+  // as upper bits of total_charge, but that incurs a measurable performance
+  // regression.
+  bool standalone = false;
+
+  inline bool IsStandalone() const { return standalone; }
+
+  inline void SetStandalone() { standalone = true; }
+};  // struct ClockHandle
+
+class BaseClockTable {
+ public:
+  BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy,
+                 MemoryAllocator* allocator,
+                 const Cache::EvictionCallback* eviction_callback,
+                 const uint32_t* hash_seed)
+      : metadata_charge_policy_(metadata_charge_policy),
+        allocator_(allocator),
+        eviction_callback_(*eviction_callback),
+        hash_seed_(*hash_seed) {}
+
+  template <class Table>
+  typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto,
+                                               size_t capacity,
+                                               bool strict_capacity_limit,
+                                               bool allow_uncharged);
+
+  template <class Table>
+  Status Insert(const ClockHandleBasicData& proto,
+                typename Table::HandleImpl** handle, Cache::Priority priority,
+                size_t capacity, bool strict_capacity_limit);
+
+  void Ref(ClockHandle& handle);
+
+  size_t GetOccupancy() const {
+    return occupancy_.load(std::memory_order_relaxed);
+  }
+
+  size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); }
+
+  size_t GetStandaloneUsage() const {
+    return standalone_usage_.load(std::memory_order_relaxed);
+  }
+
+  uint32_t GetHashSeed() const { return hash_seed_; }
+
+  struct EvictionData {
+    size_t freed_charge = 0;
+    size_t freed_count = 0;
+  };
+
+  void TrackAndReleaseEvictedEntry(ClockHandle* h, EvictionData* data);
+
+#ifndef NDEBUG
+  // Acquire N references
+  void TEST_RefN(ClockHandle& handle, size_t n);
+  // Helper for TEST_ReleaseN
+  void TEST_ReleaseNMinus1(ClockHandle* handle, size_t n);
+#endif
+
+ private:  // fns
+  // Creates a "standalone" handle for returning from an Insert operation that
+  // cannot be completed by actually inserting into the table.
+  // Updates `standalone_usage_` but not `usage_` nor `occupancy_`.
+  template <class HandleImpl>
+  HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto);
+
+  // Helper for updating `usage_` for new entry with given `total_charge`
+  // and evicting if needed under strict_capacity_limit=true rules. This
+  // means the operation might fail with Status::MemoryLimit. If
+  // `need_evict_for_occupancy`, then eviction of at least one entry is
+  // required, and the operation should fail if not possible.
+  // NOTE: Otherwise, occupancy_ is not managed in this function
+  template <class Table>
+  Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity,
+                                     bool need_evict_for_occupancy,
+                                     typename Table::InsertState& state);
+
+  // Helper for updating `usage_` for new entry with given `total_charge`
+  // and evicting if needed under strict_capacity_limit=false rules. This
+  // means that updating `usage_` always succeeds even if forced to exceed
+  // capacity. If `need_evict_for_occupancy`, then eviction of at least one
+  // entry is required, and the operation should return false if such eviction
+  // is not possible. `usage_` is not updated in that case. Otherwise, returns
+  // true, indicating success.
+  // NOTE: occupancy_ is not managed in this function
+  template <class Table>
+  bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity,
+                                      bool need_evict_for_occupancy,
+                                      typename Table::InsertState& state);
+
+ protected:  // data
+  // We partition the following members into different cache lines
+  // to avoid false sharing among Lookup, Release, Erase and Insert
+  // operations in ClockCacheShard.
+
+  // Clock algorithm sweep pointer.
+  std::atomic<uint64_t> clock_pointer_{};
+
+  ALIGN_AS(CACHE_LINE_SIZE)
+  // Number of elements in the table.
+  std::atomic<size_t> occupancy_{};
+
+  // Memory usage by entries tracked by the cache (including standalone)
+  std::atomic<size_t> usage_{};
+
+  // Part of usage by standalone entries (not in table)
+  std::atomic<size_t> standalone_usage_{};
+
+  ALIGN_AS(CACHE_LINE_SIZE)
+  const CacheMetadataChargePolicy metadata_charge_policy_;
+
+  // From Cache, for deleter
+  MemoryAllocator* const allocator_;
+
+  // A reference to Cache::eviction_callback_
+  const Cache::EvictionCallback& eviction_callback_;
+
+  // A reference to ShardedCacheBase::hash_seed_
+  const uint32_t& hash_seed_;
+};
+
+class HyperClockTable : public BaseClockTable {
+ public:
+  // Target size to be exactly a common cache line size (see static_assert in
+  // clock_cache.cc)
+  struct ALIGN_AS(64U) HandleImpl : public ClockHandle {
+    // The number of elements that hash to this slot or a lower one, but wind
+    // up in this slot or a higher one.
+    std::atomic<uint32_t> displacements{};
+
+  };  // struct HandleImpl
+
+  struct Opts {
+    size_t estimated_value_size;
+  };
+
+  HyperClockTable(size_t capacity, bool strict_capacity_limit,
+                  CacheMetadataChargePolicy metadata_charge_policy,
+                  MemoryAllocator* allocator,
+                  const Cache::EvictionCallback* eviction_callback,
+                  const uint32_t* hash_seed, const Opts& opts);
+  ~HyperClockTable();
+
+  // For BaseClockTable::Insert
+  struct InsertState {};
+
+  void StartInsert(InsertState& state);
+
+  // Returns true iff there is room for the proposed number of entries.
+  bool GrowIfNeeded(size_t new_occupancy, InsertState& state);
+
+  HandleImpl* DoInsert(const ClockHandleBasicData& proto,
+                       uint64_t initial_countdown, bool take_ref,
+                       InsertState& state);
+
+  // Runs the clock eviction algorithm trying to reclaim at least
+  // requested_charge. Returns how much is evicted, which could be less
+  // if it appears impossible to evict the requested amount without blocking.
+  void Evict(size_t requested_charge, InsertState& state, EvictionData* data);
+
+  HandleImpl* Lookup(const UniqueId64x2& hashed_key);
+
+  bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref);
+
+  void Erase(const UniqueId64x2& hashed_key);
+
+  void EraseUnRefEntries();
+
+  size_t GetTableSize() const { return size_t{1} << length_bits_; }
+
+  size_t GetOccupancyLimit() const { return occupancy_limit_; }
+
+  const HandleImpl* HandlePtr(size_t idx) const { return &array_[idx]; }
+
+#ifndef NDEBUG
+  size_t& TEST_MutableOccupancyLimit() const {
+    return const_cast<size_t&>(occupancy_limit_);
+  }
+
+  // Release N references
+  void TEST_ReleaseN(HandleImpl* handle, size_t n);
+#endif
+
+ private:  // functions
+  // Returns x mod 2^{length_bits_}.
+  inline size_t ModTableSize(uint64_t x) {
+    return BitwiseAnd(x, length_bits_mask_);
+  }
+
+  // Returns the first slot in the probe sequence with a handle e such that
+  // match_fn(e) is true. At every step, the function first tests whether
+  // match_fn(e) holds. If this is false, it evaluates abort_fn(e) to decide
+  // whether the search should be aborted, and if so, FindSlot immediately
+  // returns nullptr. For every handle e that is not a match and not aborted,
+  // FindSlot runs update_fn(e, is_last) where is_last is set to true iff that
+  // slot will be the last probed because the next would cycle back to the first
+  // slot probed. This function uses templates instead of std::function to
+  // minimize the risk of heap-allocated closures being created.
+  template <typename MatchFn, typename AbortFn, typename UpdateFn>
+  inline HandleImpl* FindSlot(const UniqueId64x2& hashed_key,
+                              const MatchFn& match_fn, const AbortFn& abort_fn,
+                              const UpdateFn& update_fn);
+
+  // Re-decrement all displacements in probe path starting from beginning
+  // until (not including) the given handle
+  inline void Rollback(const UniqueId64x2& hashed_key, const HandleImpl* h);
+
+  // Subtracts `total_charge` from `usage_` and 1 from `occupancy_`.
+  // Ideally this comes after releasing the entry itself so that we
+  // actually have the available occupancy/usage that is claimed.
+  // However, that means total_charge has to be saved from the handle
+  // before releasing it so that it can be provided to this function.
+  inline void ReclaimEntryUsage(size_t total_charge);
+
+  MemoryAllocator* GetAllocator() const { return allocator_; }
+
+  // Returns the number of bits used to hash an element in the hash
+  // table.
+  static int CalcHashBits(size_t capacity, size_t estimated_value_size,
+                          CacheMetadataChargePolicy metadata_charge_policy);
+
+ private:  // data
+  // Number of hash bits used for table index.
+  // The size of the table is 1 << length_bits_.
+  const int length_bits_;
+
+  // For faster computation of ModTableSize.
+  const size_t length_bits_mask_;
+
+  // Maximum number of elements the user can store in the table.
+  const size_t occupancy_limit_;
+
+  // Array of slots comprising the hash table.
+  const std::unique_ptr<HandleImpl[]> array_;
+};  // class HyperClockTable
+
+// A single shard of sharded cache.
+template <class Table>
+class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase {
+ public:
+  ClockCacheShard(size_t capacity, bool strict_capacity_limit,
+                  CacheMetadataChargePolicy metadata_charge_policy,
+                  MemoryAllocator* allocator,
+                  const Cache::EvictionCallback* eviction_callback,
+                  const uint32_t* hash_seed, const typename Table::Opts& opts);
+
+  // For CacheShard concept
+  using HandleImpl = typename Table::HandleImpl;
+  // Hash is lossless hash of 128-bit key
+  using HashVal = UniqueId64x2;
+  using HashCref = const HashVal&;
+  static inline uint32_t HashPieceForSharding(HashCref hash) {
+    return Upper32of64(hash[0]);
+  }
+  static inline HashVal ComputeHash(const Slice& key, uint32_t seed) {
+    assert(key.size() == kCacheKeySize);
+    HashVal in;
+    HashVal out;
+    // NOTE: endian dependence
+    // TODO: use GetUnaligned?
+    std::memcpy(&in, key.data(), kCacheKeySize);
+    BijectiveHash2x64(in[1], in[0] ^ seed, &out[1], &out[0]);
+    return out;
+  }
+
+  // For reconstructing key from hashed_key. Requires the caller to provide
+  // backing storage for the Slice in `unhashed`
+  static inline Slice ReverseHash(const UniqueId64x2& hashed,
+                                  UniqueId64x2* unhashed, uint32_t seed) {
+    BijectiveUnhash2x64(hashed[1], hashed[0], &(*unhashed)[1], &(*unhashed)[0]);
+    (*unhashed)[0] ^= seed;
+    // NOTE: endian dependence
+    return Slice(reinterpret_cast<const char*>(unhashed), kCacheKeySize);
+  }
+
+  // Although capacity is dynamically changeable, the number of table slots is
+  // not, so growing capacity substantially could lead to hitting occupancy
+  // limit.
+  void SetCapacity(size_t capacity);
+
+  void SetStrictCapacityLimit(bool strict_capacity_limit);
+
+  Status Insert(const Slice& key, const UniqueId64x2& hashed_key,
+                Cache::ObjectPtr value, const Cache::CacheItemHelper* helper,
+                size_t charge, HandleImpl** handle, Cache::Priority priority);
+
+  HandleImpl* CreateStandalone(const Slice& key, const UniqueId64x2& hashed_key,
+                               Cache::ObjectPtr obj,
+                               const Cache::CacheItemHelper* helper,
+                               size_t charge, bool allow_uncharged);
+
+  HandleImpl* Lookup(const Slice& key, const UniqueId64x2& hashed_key);
+
+  bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref);
+
+  bool Release(HandleImpl* handle, bool erase_if_last_ref = false);
+
+  bool Ref(HandleImpl* handle);
+
+  void Erase(const Slice& key, const UniqueId64x2& hashed_key);
+
+  size_t GetCapacity() const;
+
+  size_t GetUsage() const;
+
+  size_t GetStandaloneUsage() const;
+
+  size_t GetPinnedUsage() const;
+
+  size_t GetOccupancyCount() const;
+
+  size_t GetOccupancyLimit() const;
+
+  size_t GetTableAddressCount() const;
+
+  void ApplyToSomeEntries(
+      const std::function<void(const Slice& key, Cache::ObjectPtr obj,
+                               size_t charge,
+                               const Cache::CacheItemHelper* helper)>& callback,
+      size_t average_entries_per_lock, size_t* state);
+
+  void EraseUnRefEntries();
+
+  std::string GetPrintableOptions() const { return std::string{}; }
+
+  HandleImpl* Lookup(const Slice& key, const UniqueId64x2& hashed_key,
+                     const Cache::CacheItemHelper* /*helper*/,
+                     Cache::CreateContext* /*create_context*/,
+                     Cache::Priority /*priority*/, Statistics* /*stats*/) {
+    return Lookup(key, hashed_key);
+  }
+
+#ifndef NDEBUG
+  size_t& TEST_MutableOccupancyLimit() const {
+    return table_.TEST_MutableOccupancyLimit();
+  }
+  // Acquire/release N references
+  void TEST_RefN(HandleImpl* handle, size_t n);
+  void TEST_ReleaseN(HandleImpl* handle, size_t n);
+#endif
+
+ private:  // data
+  Table table_;
+
+  // Maximum total charge of all elements stored in the table.
+  std::atomic<size_t> capacity_;
+
+  // Whether to reject insertion if cache reaches its full capacity.
+  std::atomic<bool> strict_capacity_limit_;
+};  // class ClockCacheShard
+
+class HyperClockCache
+#ifdef NDEBUG
+    final
+#endif
+    : public ShardedCache<ClockCacheShard<HyperClockTable>> {
+ public:
+  using Shard = ClockCacheShard<HyperClockTable>;
+
+  explicit HyperClockCache(const HyperClockCacheOptions& opts);
+
+  const char* Name() const override { return "HyperClockCache"; }
+
+  Cache::ObjectPtr Value(Handle* handle) override;
+
+  size_t GetCharge(Handle* handle) const override;
+
+  const CacheItemHelper* GetCacheItemHelper(Handle* handle) const override;
+
+  void ReportProblems(
+      const std::shared_ptr<Logger>& /*info_log*/) const override;
+};  // class HyperClockCache
+
+}  // namespace clock_cache
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/compressed_secondary_cache.cc
+++ b/cache/compressed_secondary_cache.cc
@ -0,0 +1,318 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/compressed_secondary_cache.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+
+#include "memory/memory_allocator_impl.h"
+#include "monitoring/perf_context_imp.h"
+#include "util/compression.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+CompressedSecondaryCache::CompressedSecondaryCache(
+    const CompressedSecondaryCacheOptions& opts)
+    : cache_(opts.LRUCacheOptions::MakeSharedCache()),
+      cache_options_(opts),
+      cache_res_mgr_(std::make_shared<ConcurrentCacheReservationManager>(
+          std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+              cache_))) {}
+
+CompressedSecondaryCache::~CompressedSecondaryCache() {
+  assert(cache_res_mgr_->GetTotalReservedCacheSize() == 0);
+}
+
+std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
+    const Slice& key, const Cache::CacheItemHelper* helper,
+    Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase,
+    bool& kept_in_sec_cache) {
+  assert(helper);
+  std::unique_ptr<SecondaryCacheResultHandle> handle;
+  kept_in_sec_cache = false;
+  Cache::Handle* lru_handle = cache_->Lookup(key);
+  if (lru_handle == nullptr) {
+    return nullptr;
+  }
+
+  void* handle_value = cache_->Value(lru_handle);
+  if (handle_value == nullptr) {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+    return nullptr;
+  }
+
+  CacheAllocationPtr* ptr{nullptr};
+  CacheAllocationPtr merged_value;
+  size_t handle_value_charge{0};
+  if (cache_options_.enable_custom_split_merge) {
+    CacheValueChunk* value_chunk_ptr =
+        reinterpret_cast<CacheValueChunk*>(handle_value);
+    merged_value = MergeChunksIntoValue(value_chunk_ptr, handle_value_charge);
+    ptr = &merged_value;
+  } else {
+    ptr = reinterpret_cast<CacheAllocationPtr*>(handle_value);
+    handle_value_charge = cache_->GetCharge(lru_handle);
+  }
+  MemoryAllocator* allocator = cache_options_.memory_allocator.get();
+
+  Status s;
+  Cache::ObjectPtr value{nullptr};
+  size_t charge{0};
+  if (cache_options_.compression_type == kNoCompression ||
+      cache_options_.do_not_compress_roles.Contains(helper->role)) {
+    s = helper->create_cb(Slice(ptr->get(), handle_value_charge),
+                          create_context, allocator, &value, &charge);
+  } else {
+    UncompressionContext uncompression_context(cache_options_.compression_type);
+    UncompressionInfo uncompression_info(uncompression_context,
+                                         UncompressionDict::GetEmptyDict(),
+                                         cache_options_.compression_type);
+
+    size_t uncompressed_size{0};
+    CacheAllocationPtr uncompressed = UncompressData(
+        uncompression_info, (char*)ptr->get(), handle_value_charge,
+        &uncompressed_size, cache_options_.compress_format_version, allocator);
+
+    if (!uncompressed) {
+      cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+      return nullptr;
+    }
+    s = helper->create_cb(Slice(uncompressed.get(), uncompressed_size),
+                          create_context, allocator, &value, &charge);
+  }
+
+  if (!s.ok()) {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+    return nullptr;
+  }
+
+  if (advise_erase) {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
+    // Insert a dummy handle.
+    cache_
+        ->Insert(key, /*obj=*/nullptr,
+                 GetHelper(cache_options_.enable_custom_split_merge),
+                 /*charge=*/0)
+        .PermitUncheckedError();
+  } else {
+    kept_in_sec_cache = true;
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+  }
+  handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
+  return handle;
+}
+
+Status CompressedSecondaryCache::Insert(const Slice& key,
+                                        Cache::ObjectPtr value,
+                                        const Cache::CacheItemHelper* helper) {
+  if (value == nullptr) {
+    return Status::InvalidArgument();
+  }
+
+  Cache::Handle* lru_handle = cache_->Lookup(key);
+  auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge);
+  if (lru_handle == nullptr) {
+    PERF_COUNTER_ADD(compressed_sec_cache_insert_dummy_count, 1);
+    // Insert a dummy handle if the handle is evicted for the first time.
+    return cache_->Insert(key, /*obj=*/nullptr, internal_helper,
+                          /*charge=*/0);
+  } else {
+    cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
+  }
+
+  size_t size = (*helper->size_cb)(value);
+  CacheAllocationPtr ptr =
+      AllocateBlock(size, cache_options_.memory_allocator.get());
+
+  Status s = (*helper->saveto_cb)(value, 0, size, ptr.get());
+  if (!s.ok()) {
+    return s;
+  }
+  Slice val(ptr.get(), size);
+
+  std::string compressed_val;
+  if (cache_options_.compression_type != kNoCompression &&
+      !cache_options_.do_not_compress_roles.Contains(helper->role)) {
+    PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, size);
+    CompressionOptions compression_opts;
+    CompressionContext compression_context(cache_options_.compression_type);
+    uint64_t sample_for_compression{0};
+    CompressionInfo compression_info(
+        compression_opts, compression_context, CompressionDict::GetEmptyDict(),
+        cache_options_.compression_type, sample_for_compression);
+
+    bool success =
+        CompressData(val, compression_info,
+                     cache_options_.compress_format_version, &compressed_val);
+
+    if (!success) {
+      return Status::Corruption("Error compressing value.");
+    }
+
+    val = Slice(compressed_val);
+    size = compressed_val.size();
+    PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes, size);
+
+    if (!cache_options_.enable_custom_split_merge) {
+      ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
+      memcpy(ptr.get(), compressed_val.data(), size);
+    }
+  }
+
+  PERF_COUNTER_ADD(compressed_sec_cache_insert_real_count, 1);
+  if (cache_options_.enable_custom_split_merge) {
+    size_t charge{0};
+    CacheValueChunk* value_chunks_head =
+        SplitValueIntoChunks(val, cache_options_.compression_type, charge);
+    return cache_->Insert(key, value_chunks_head, internal_helper, charge);
+  } else {
+    CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
+    return cache_->Insert(key, buf, internal_helper, size);
+  }
+}
+
+void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
+
+Status CompressedSecondaryCache::SetCapacity(size_t capacity) {
+  MutexLock l(&capacity_mutex_);
+  cache_options_.capacity = capacity;
+  cache_->SetCapacity(capacity);
+  return Status::OK();
+}
+
+Status CompressedSecondaryCache::GetCapacity(size_t& capacity) {
+  MutexLock l(&capacity_mutex_);
+  capacity = cache_options_.capacity;
+  return Status::OK();
+}
+
+std::string CompressedSecondaryCache::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize{200};
+  char buffer[kBufferSize];
+  ret.append(cache_->GetPrintableOptions());
+  snprintf(buffer, kBufferSize, "    compression_type : %s\n",
+           CompressionTypeToString(cache_options_.compression_type).c_str());
+  ret.append(buffer);
+  snprintf(buffer, kBufferSize, "    compress_format_version : %d\n",
+           cache_options_.compress_format_version);
+  ret.append(buffer);
+  return ret;
+}
+
+CompressedSecondaryCache::CacheValueChunk*
+CompressedSecondaryCache::SplitValueIntoChunks(const Slice& value,
+                                               CompressionType compression_type,
+                                               size_t& charge) {
+  assert(!value.empty());
+  const char* src_ptr = value.data();
+  size_t src_size{value.size()};
+
+  CacheValueChunk dummy_head = CacheValueChunk();
+  CacheValueChunk* current_chunk = &dummy_head;
+  // Do not split when value size is large or there is no compression.
+  size_t predicted_chunk_size{0};
+  size_t actual_chunk_size{0};
+  size_t tmp_size{0};
+  while (src_size > 0) {
+    predicted_chunk_size = sizeof(CacheValueChunk) - 1 + src_size;
+    auto upper =
+        std::upper_bound(malloc_bin_sizes_.begin(), malloc_bin_sizes_.end(),
+                         predicted_chunk_size);
+    // Do not split when value size is too small, too large, close to a bin
+    // size, or there is no compression.
+    if (upper == malloc_bin_sizes_.begin() ||
+        upper == malloc_bin_sizes_.end() ||
+        *upper - predicted_chunk_size < malloc_bin_sizes_.front() ||
+        compression_type == kNoCompression) {
+      tmp_size = predicted_chunk_size;
+    } else {
+      tmp_size = *(--upper);
+    }
+
+    CacheValueChunk* new_chunk =
+        reinterpret_cast<CacheValueChunk*>(new char[tmp_size]);
+    current_chunk->next = new_chunk;
+    current_chunk = current_chunk->next;
+    actual_chunk_size = tmp_size - sizeof(CacheValueChunk) + 1;
+    memcpy(current_chunk->data, src_ptr, actual_chunk_size);
+    current_chunk->size = actual_chunk_size;
+    src_ptr += actual_chunk_size;
+    src_size -= actual_chunk_size;
+    charge += tmp_size;
+  }
+  current_chunk->next = nullptr;
+
+  return dummy_head.next;
+}
+
+CacheAllocationPtr CompressedSecondaryCache::MergeChunksIntoValue(
+    const void* chunks_head, size_t& charge) {
+  const CacheValueChunk* head =
+      reinterpret_cast<const CacheValueChunk*>(chunks_head);
+  const CacheValueChunk* current_chunk = head;
+  charge = 0;
+  while (current_chunk != nullptr) {
+    charge += current_chunk->size;
+    current_chunk = current_chunk->next;
+  }
+
+  CacheAllocationPtr ptr =
+      AllocateBlock(charge, cache_options_.memory_allocator.get());
+  current_chunk = head;
+  size_t pos{0};
+  while (current_chunk != nullptr) {
+    memcpy(ptr.get() + pos, current_chunk->data, current_chunk->size);
+    pos += current_chunk->size;
+    current_chunk = current_chunk->next;
+  }
+
+  return ptr;
+}
+
+const Cache::CacheItemHelper* CompressedSecondaryCache::GetHelper(
+    bool enable_custom_split_merge) const {
+  if (enable_custom_split_merge) {
+    static const Cache::CacheItemHelper kHelper{
+        CacheEntryRole::kMisc,
+        [](Cache::ObjectPtr obj, MemoryAllocator* /*alloc*/) {
+          CacheValueChunk* chunks_head = static_cast<CacheValueChunk*>(obj);
+          while (chunks_head != nullptr) {
+            CacheValueChunk* tmp_chunk = chunks_head;
+            chunks_head = chunks_head->next;
+            tmp_chunk->Free();
+            obj = nullptr;
+          };
+        }};
+    return &kHelper;
+  } else {
+    static const Cache::CacheItemHelper kHelper{
+        CacheEntryRole::kMisc,
+        [](Cache::ObjectPtr obj, MemoryAllocator* /*alloc*/) {
+          delete static_cast<CacheAllocationPtr*>(obj);
+          obj = nullptr;
+        }};
+    return &kHelper;
+  }
+}
+
+std::shared_ptr<SecondaryCache>
+CompressedSecondaryCacheOptions::MakeSharedSecondaryCache() const {
+  return std::make_shared<CompressedSecondaryCache>(*this);
+}
+
+Status CompressedSecondaryCache::Deflate(size_t decrease) {
+  return cache_res_mgr_->UpdateCacheReservation(decrease, /*increase=*/true);
+}
+
+Status CompressedSecondaryCache::Inflate(size_t increase) {
+  return cache_res_mgr_->UpdateCacheReservation(increase, /*increase=*/false);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/compressed_secondary_cache.h
+++ b/cache/compressed_secondary_cache.h
@ -0,0 +1,140 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "cache/cache_reservation_manager.h"
+#include "cache/lru_cache.h"
+#include "memory/memory_allocator_impl.h"
+#include "rocksdb/secondary_cache.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+#include "util/compression.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
+ public:
+  CompressedSecondaryCacheResultHandle(Cache::ObjectPtr value, size_t size)
+      : value_(value), size_(size) {}
+  ~CompressedSecondaryCacheResultHandle() override = default;
+
+  CompressedSecondaryCacheResultHandle(
+      const CompressedSecondaryCacheResultHandle&) = delete;
+  CompressedSecondaryCacheResultHandle& operator=(
+      const CompressedSecondaryCacheResultHandle&) = delete;
+
+  bool IsReady() override { return true; }
+
+  void Wait() override {}
+
+  Cache::ObjectPtr Value() override { return value_; }
+
+  size_t Size() override { return size_; }
+
+ private:
+  Cache::ObjectPtr value_;
+  size_t size_;
+};
+
+// The CompressedSecondaryCache is a concrete implementation of
+// rocksdb::SecondaryCache.
+//
+// When a block is found from CompressedSecondaryCache::Lookup, we check whether
+// there is a dummy block with the same key in the primary cache.
+// 1. If the dummy block exits, we erase the block from
+//    CompressedSecondaryCache and insert it into the primary cache.
+// 2. If not, we just insert a dummy block into the primary cache
+//    (charging the actual size of the block) and don not erase the block from
+//    CompressedSecondaryCache. A standalone handle is returned to the caller.
+//
+// When a block is evicted from the primary cache, we check whether
+// there is a dummy block with the same key in CompressedSecondaryCache.
+// 1. If the dummy block exits, the block is inserted into
+//    CompressedSecondaryCache.
+// 2. If not, we just insert a dummy block (size 0) in CompressedSecondaryCache.
+//
+// Users can also cast a pointer to CompressedSecondaryCache and call methods on
+// it directly, especially custom methods that may be added
+// in the future.  For example -
+// std::unique_ptr<rocksdb::SecondaryCache> cache =
+//      NewCompressedSecondaryCache(opts);
+// static_cast<CompressedSecondaryCache*>(cache.get())->Erase(key);
+
+class CompressedSecondaryCache : public SecondaryCache {
+ public:
+  explicit CompressedSecondaryCache(
+      const CompressedSecondaryCacheOptions& opts);
+  ~CompressedSecondaryCache() override;
+
+  const char* Name() const override { return "CompressedSecondaryCache"; }
+
+  Status Insert(const Slice& key, Cache::ObjectPtr value,
+                const Cache::CacheItemHelper* helper) override;
+
+  std::unique_ptr<SecondaryCacheResultHandle> Lookup(
+      const Slice& key, const Cache::CacheItemHelper* helper,
+      Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase,
+      bool& kept_in_sec_cache) override;
+
+  bool SupportForceErase() const override { return true; }
+
+  void Erase(const Slice& key) override;
+
+  void WaitAll(std::vector<SecondaryCacheResultHandle*> /*handles*/) override {}
+
+  Status SetCapacity(size_t capacity) override;
+
+  Status GetCapacity(size_t& capacity) override;
+
+  Status Deflate(size_t decrease) override;
+
+  Status Inflate(size_t increase) override;
+
+  std::string GetPrintableOptions() const override;
+
+  size_t TEST_GetUsage() { return cache_->GetUsage(); }
+
+ private:
+  friend class CompressedSecondaryCacheTestBase;
+  static constexpr std::array<uint16_t, 8> malloc_bin_sizes_{
+      128, 256, 512, 1024, 2048, 4096, 8192, 16384};
+
+  struct CacheValueChunk {
+    // TODO try "CacheAllocationPtr next;".
+    CacheValueChunk* next;
+    size_t size;
+    // Beginning of the chunk data (MUST BE THE LAST FIELD IN THIS STRUCT!)
+    char data[1];
+
+    void Free() { delete[] reinterpret_cast<char*>(this); }
+  };
+
+  // Split value into chunks to better fit into jemalloc bins. The chunks
+  // are stored in CacheValueChunk and extra charge is needed for each chunk,
+  // so the cache charge is recalculated here.
+  CacheValueChunk* SplitValueIntoChunks(const Slice& value,
+                                        CompressionType compression_type,
+                                        size_t& charge);
+
+  // After merging chunks, the extra charge for each chunk is removed, so
+  // the charge is recalculated.
+  CacheAllocationPtr MergeChunksIntoValue(const void* chunks_head,
+                                          size_t& charge);
+
+  // TODO: clean up to use cleaner interfaces in typed_cache.h
+  const Cache::CacheItemHelper* GetHelper(bool enable_custom_split_merge) const;
+  std::shared_ptr<Cache> cache_;
+  CompressedSecondaryCacheOptions cache_options_;
+  mutable port::Mutex capacity_mutex_;
+  std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/compressed_secondary_cache_test.cc
+++ b/cache/compressed_secondary_cache_test.cc
--- a/cache/lru_cache.cc
+++ b/cache/lru_cache.cc
@ -0,0 +1,723 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "cache/lru_cache.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+
+#include "cache/secondary_cache_adapter.h"
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics_impl.h"
+#include "port/lang.h"
+#include "util/distributed_mutex.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
+
+LRUHandleTable::LRUHandleTable(int max_upper_hash_bits,
+                               MemoryAllocator* allocator)
+    : length_bits_(/* historical starting size*/ 4),
+      list_(new LRUHandle* [size_t{1} << length_bits_] {}),
+      elems_(0),
+      max_length_bits_(max_upper_hash_bits),
+      allocator_(allocator) {}
+
+LRUHandleTable::~LRUHandleTable() {
+  auto alloc = allocator_;
+  ApplyToEntriesRange(
+      [alloc](LRUHandle* h) {
+        if (!h->HasRefs()) {
+          h->Free(alloc);
+        }
+      },
+      0, size_t{1} << length_bits_);
+}
+
+LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
+  return *FindPointer(key, hash);
+}
+
+LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
+  LRUHandle** ptr = FindPointer(h->key(), h->hash);
+  LRUHandle* old = *ptr;
+  h->next_hash = (old == nullptr ? nullptr : old->next_hash);
+  *ptr = h;
+  if (old == nullptr) {
+    ++elems_;
+    if ((elems_ >> length_bits_) > 0) {  // elems_ >= length
+      // Since each cache entry is fairly large, we aim for a small
+      // average linked list length (<= 1).
+      Resize();
+    }
+  }
+  return old;
+}
+
+LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = FindPointer(key, hash);
+  LRUHandle* result = *ptr;
+  if (result != nullptr) {
+    *ptr = result->next_hash;
+    --elems_;
+  }
+  return result;
+}
+
+LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
+  while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
+    ptr = &(*ptr)->next_hash;
+  }
+  return ptr;
+}
+
+void LRUHandleTable::Resize() {
+  if (length_bits_ >= max_length_bits_) {
+    // Due to reaching limit of hash information, if we made the table bigger,
+    // we would allocate more addresses but only the same number would be used.
+    return;
+  }
+  if (length_bits_ >= 31) {
+    // Avoid undefined behavior shifting uint32_t by 32.
+    return;
+  }
+
+  uint32_t old_length = uint32_t{1} << length_bits_;
+  int new_length_bits = length_bits_ + 1;
+  std::unique_ptr<LRUHandle* []> new_list {
+    new LRUHandle* [size_t{1} << new_length_bits] {}
+  };
+  [[maybe_unused]] uint32_t count = 0;
+  for (uint32_t i = 0; i < old_length; i++) {
+    LRUHandle* h = list_[i];
+    while (h != nullptr) {
+      LRUHandle* next = h->next_hash;
+      uint32_t hash = h->hash;
+      LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
+      h->next_hash = *ptr;
+      *ptr = h;
+      h = next;
+      count++;
+    }
+  }
+  assert(elems_ == count);
+  list_ = std::move(new_list);
+  length_bits_ = new_length_bits;
+}
+
+LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                             double high_pri_pool_ratio,
+                             double low_pri_pool_ratio, bool use_adaptive_mutex,
+                             CacheMetadataChargePolicy metadata_charge_policy,
+                             int max_upper_hash_bits,
+                             MemoryAllocator* allocator,
+                             const Cache::EvictionCallback* eviction_callback)
+    : CacheShardBase(metadata_charge_policy),
+      capacity_(0),
+      high_pri_pool_usage_(0),
+      low_pri_pool_usage_(0),
+      strict_capacity_limit_(strict_capacity_limit),
+      high_pri_pool_ratio_(high_pri_pool_ratio),
+      high_pri_pool_capacity_(0),
+      low_pri_pool_ratio_(low_pri_pool_ratio),
+      low_pri_pool_capacity_(0),
+      table_(max_upper_hash_bits, allocator),
+      usage_(0),
+      lru_usage_(0),
+      mutex_(use_adaptive_mutex),
+      eviction_callback_(*eviction_callback) {
+  // Make empty circular linked list.
+  lru_.next = &lru_;
+  lru_.prev = &lru_;
+  lru_low_pri_ = &lru_;
+  lru_bottom_pri_ = &lru_;
+  SetCapacity(capacity);
+}
+
+void LRUCacheShard::EraseUnRefEntries() {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    DMutexLock l(mutex_);
+    while (lru_.next != &lru_) {
+      LRUHandle* old = lru_.next;
+      // LRU list contains only elements which can be evicted.
+      assert(old->InCache() && !old->HasRefs());
+      LRU_Remove(old);
+      table_.Remove(old->key(), old->hash);
+      old->SetInCache(false);
+      assert(usage_ >= old->total_charge);
+      usage_ -= old->total_charge;
+      last_reference_list.push_back(old);
+    }
+  }
+
+  for (auto entry : last_reference_list) {
+    entry->Free(table_.GetAllocator());
+  }
+}
+
+void LRUCacheShard::ApplyToSomeEntries(
+    const std::function<void(const Slice& key, Cache::ObjectPtr value,
+                             size_t charge,
+                             const Cache::CacheItemHelper* helper)>& callback,
+    size_t average_entries_per_lock, size_t* state) {
+  // The state is essentially going to be the starting hash, which works
+  // nicely even if we resize between calls because we use upper-most
+  // hash bits for table indexes.
+  DMutexLock l(mutex_);
+  int length_bits = table_.GetLengthBits();
+  size_t length = size_t{1} << length_bits;
+
+  assert(average_entries_per_lock > 0);
+  // Assuming we are called with same average_entries_per_lock repeatedly,
+  // this simplifies some logic (index_end will not overflow).
+  assert(average_entries_per_lock < length || *state == 0);
+
+  size_t index_begin = *state >> (sizeof(size_t) * 8u - length_bits);
+  size_t index_end = index_begin + average_entries_per_lock;
+  if (index_end >= length) {
+    // Going to end
+    index_end = length;
+    *state = SIZE_MAX;
+  } else {
+    *state = index_end << (sizeof(size_t) * 8u - length_bits);
+  }
+
+  table_.ApplyToEntriesRange(
+      [callback,
+       metadata_charge_policy = metadata_charge_policy_](LRUHandle* h) {
+        callback(h->key(), h->value, h->GetCharge(metadata_charge_policy),
+                 h->helper);
+      },
+      index_begin, index_end);
+}
+
+void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri,
+                                    LRUHandle** lru_bottom_pri) {
+  DMutexLock l(mutex_);
+  *lru = &lru_;
+  *lru_low_pri = lru_low_pri_;
+  *lru_bottom_pri = lru_bottom_pri_;
+}
+
+size_t LRUCacheShard::TEST_GetLRUSize() {
+  DMutexLock l(mutex_);
+  LRUHandle* lru_handle = lru_.next;
+  size_t lru_size = 0;
+  while (lru_handle != &lru_) {
+    lru_size++;
+    lru_handle = lru_handle->next;
+  }
+  return lru_size;
+}
+
+double LRUCacheShard::GetHighPriPoolRatio() {
+  DMutexLock l(mutex_);
+  return high_pri_pool_ratio_;
+}
+
+double LRUCacheShard::GetLowPriPoolRatio() {
+  DMutexLock l(mutex_);
+  return low_pri_pool_ratio_;
+}
+
+void LRUCacheShard::LRU_Remove(LRUHandle* e) {
+  assert(e->next != nullptr);
+  assert(e->prev != nullptr);
+  if (lru_low_pri_ == e) {
+    lru_low_pri_ = e->prev;
+  }
+  if (lru_bottom_pri_ == e) {
+    lru_bottom_pri_ = e->prev;
+  }
+  e->next->prev = e->prev;
+  e->prev->next = e->next;
+  e->prev = e->next = nullptr;
+  assert(lru_usage_ >= e->total_charge);
+  lru_usage_ -= e->total_charge;
+  assert(!e->InHighPriPool() || !e->InLowPriPool());
+  if (e->InHighPriPool()) {
+    assert(high_pri_pool_usage_ >= e->total_charge);
+    high_pri_pool_usage_ -= e->total_charge;
+  } else if (e->InLowPriPool()) {
+    assert(low_pri_pool_usage_ >= e->total_charge);
+    low_pri_pool_usage_ -= e->total_charge;
+  }
+}
+
+void LRUCacheShard::LRU_Insert(LRUHandle* e) {
+  assert(e->next == nullptr);
+  assert(e->prev == nullptr);
+  if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
+    // Inset "e" to head of LRU list.
+    e->next = &lru_;
+    e->prev = lru_.prev;
+    e->prev->next = e;
+    e->next->prev = e;
+    e->SetInHighPriPool(true);
+    e->SetInLowPriPool(false);
+    high_pri_pool_usage_ += e->total_charge;
+    MaintainPoolSize();
+  } else if (low_pri_pool_ratio_ > 0 &&
+             (e->IsHighPri() || e->IsLowPri() || e->HasHit())) {
+    // Insert "e" to the head of low-pri pool.
+    e->next = lru_low_pri_->next;
+    e->prev = lru_low_pri_;
+    e->prev->next = e;
+    e->next->prev = e;
+    e->SetInHighPriPool(false);
+    e->SetInLowPriPool(true);
+    low_pri_pool_usage_ += e->total_charge;
+    MaintainPoolSize();
+    lru_low_pri_ = e;
+  } else {
+    // Insert "e" to the head of bottom-pri pool.
+    e->next = lru_bottom_pri_->next;
+    e->prev = lru_bottom_pri_;
+    e->prev->next = e;
+    e->next->prev = e;
+    e->SetInHighPriPool(false);
+    e->SetInLowPriPool(false);
+    // if the low-pri pool is empty, lru_low_pri_ also needs to be updated.
+    if (lru_bottom_pri_ == lru_low_pri_) {
+      lru_low_pri_ = e;
+    }
+    lru_bottom_pri_ = e;
+  }
+  lru_usage_ += e->total_charge;
+}
+
+void LRUCacheShard::MaintainPoolSize() {
+  while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
+    // Overflow last entry in high-pri pool to low-pri pool.
+    lru_low_pri_ = lru_low_pri_->next;
+    assert(lru_low_pri_ != &lru_);
+    lru_low_pri_->SetInHighPriPool(false);
+    lru_low_pri_->SetInLowPriPool(true);
+    assert(high_pri_pool_usage_ >= lru_low_pri_->total_charge);
+    high_pri_pool_usage_ -= lru_low_pri_->total_charge;
+    low_pri_pool_usage_ += lru_low_pri_->total_charge;
+  }
+
+  while (low_pri_pool_usage_ > low_pri_pool_capacity_) {
+    // Overflow last entry in low-pri pool to bottom-pri pool.
+    lru_bottom_pri_ = lru_bottom_pri_->next;
+    assert(lru_bottom_pri_ != &lru_);
+    lru_bottom_pri_->SetInHighPriPool(false);
+    lru_bottom_pri_->SetInLowPriPool(false);
+    assert(low_pri_pool_usage_ >= lru_bottom_pri_->total_charge);
+    low_pri_pool_usage_ -= lru_bottom_pri_->total_charge;
+  }
+}
+
+void LRUCacheShard::EvictFromLRU(size_t charge,
+                                 autovector<LRUHandle*>* deleted) {
+  while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
+    LRUHandle* old = lru_.next;
+    // LRU list contains only elements which can be evicted.
+    assert(old->InCache() && !old->HasRefs());
+    LRU_Remove(old);
+    table_.Remove(old->key(), old->hash);
+    old->SetInCache(false);
+    assert(usage_ >= old->total_charge);
+    usage_ -= old->total_charge;
+    deleted->push_back(old);
+  }
+}
+
+void LRUCacheShard::NotifyEvicted(
+    const autovector<LRUHandle*>& evicted_handles) {
+  MemoryAllocator* alloc = table_.GetAllocator();
+  for (LRUHandle* entry : evicted_handles) {
+    if (eviction_callback_ &&
+        eviction_callback_(entry->key(),
+                           reinterpret_cast<Cache::Handle*>(entry))) {
+      // Callback took ownership of obj; just free handle
+      free(entry);
+    } else {
+      // Free the entries here outside of mutex for performance reasons.
+      entry->Free(alloc);
+    }
+  }
+}
+
+void LRUCacheShard::SetCapacity(size_t capacity) {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    DMutexLock l(mutex_);
+    capacity_ = capacity;
+    high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
+    low_pri_pool_capacity_ = capacity_ * low_pri_pool_ratio_;
+    EvictFromLRU(0, &last_reference_list);
+  }
+
+  NotifyEvicted(last_reference_list);
+}
+
+void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
+  DMutexLock l(mutex_);
+  strict_capacity_limit_ = strict_capacity_limit;
+}
+
+Status LRUCacheShard::InsertItem(LRUHandle* e, LRUHandle** handle) {
+  Status s = Status::OK();
+  autovector<LRUHandle*> last_reference_list;
+
+  {
+    DMutexLock l(mutex_);
+
+    // Free the space following strict LRU policy until enough space
+    // is freed or the lru list is empty.
+    EvictFromLRU(e->total_charge, &last_reference_list);
+
+    if ((usage_ + e->total_charge) > capacity_ &&
+        (strict_capacity_limit_ || handle == nullptr)) {
+      e->SetInCache(false);
+      if (handle == nullptr) {
+        // Don't insert the entry but still return ok, as if the entry inserted
+        // into cache and get evicted immediately.
+        last_reference_list.push_back(e);
+      } else {
+        free(e);
+        e = nullptr;
+        *handle = nullptr;
+        s = Status::MemoryLimit("Insert failed due to LRU cache being full.");
+      }
+    } else {
+      // Insert into the cache. Note that the cache might get larger than its
+      // capacity if not enough space was freed up.
+      LRUHandle* old = table_.Insert(e);
+      usage_ += e->total_charge;
+      if (old != nullptr) {
+        s = Status::OkOverwritten();
+        assert(old->InCache());
+        old->SetInCache(false);
+        if (!old->HasRefs()) {
+          // old is on LRU because it's in cache and its reference count is 0.
+          LRU_Remove(old);
+          assert(usage_ >= old->total_charge);
+          usage_ -= old->total_charge;
+          last_reference_list.push_back(old);
+        }
+      }
+      if (handle == nullptr) {
+        LRU_Insert(e);
+      } else {
+        // If caller already holds a ref, no need to take one here.
+        if (!e->HasRefs()) {
+          e->Ref();
+        }
+        *handle = e;
+      }
+    }
+  }
+
+  NotifyEvicted(last_reference_list);
+
+  return s;
+}
+
+LRUHandle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash,
+                                 const Cache::CacheItemHelper* /*helper*/,
+                                 Cache::CreateContext* /*create_context*/,
+                                 Cache::Priority /*priority*/,
+                                 Statistics* /*stats*/) {
+  DMutexLock l(mutex_);
+  LRUHandle* e = table_.Lookup(key, hash);
+  if (e != nullptr) {
+    assert(e->InCache());
+    if (!e->HasRefs()) {
+      // The entry is in LRU since it's in hash and has no external
+      // references.
+      LRU_Remove(e);
+    }
+    e->Ref();
+    e->SetHit();
+  }
+  return e;
+}
+
+bool LRUCacheShard::Ref(LRUHandle* e) {
+  DMutexLock l(mutex_);
+  // To create another reference - entry must be already externally referenced.
+  assert(e->HasRefs());
+  e->Ref();
+  return true;
+}
+
+void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
+  DMutexLock l(mutex_);
+  high_pri_pool_ratio_ = high_pri_pool_ratio;
+  high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
+  MaintainPoolSize();
+}
+
+void LRUCacheShard::SetLowPriorityPoolRatio(double low_pri_pool_ratio) {
+  DMutexLock l(mutex_);
+  low_pri_pool_ratio_ = low_pri_pool_ratio;
+  low_pri_pool_capacity_ = capacity_ * low_pri_pool_ratio_;
+  MaintainPoolSize();
+}
+
+bool LRUCacheShard::Release(LRUHandle* e, bool /*useful*/,
+                            bool erase_if_last_ref) {
+  if (e == nullptr) {
+    return false;
+  }
+  bool must_free;
+  bool was_in_cache;
+  {
+    DMutexLock l(mutex_);
+    must_free = e->Unref();
+    was_in_cache = e->InCache();
+    if (must_free && was_in_cache) {
+      // The item is still in cache, and nobody else holds a reference to it.
+      if (usage_ > capacity_ || erase_if_last_ref) {
+        // The LRU list must be empty since the cache is full.
+        assert(lru_.next == &lru_ || erase_if_last_ref);
+        // Take this opportunity and remove the item.
+        table_.Remove(e->key(), e->hash);
+        e->SetInCache(false);
+      } else {
+        // Put the item back on the LRU list, and don't free it.
+        LRU_Insert(e);
+        must_free = false;
+      }
+    }
+    // If about to be freed, then decrement the cache usage.
+    if (must_free) {
+      assert(usage_ >= e->total_charge);
+      usage_ -= e->total_charge;
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  if (must_free) {
+    // Only call eviction callback if we're sure no one requested erasure
+    // FIXME: disabled because of test churn
+    if (false && was_in_cache && !erase_if_last_ref && eviction_callback_ &&
+        eviction_callback_(e->key(), reinterpret_cast<Cache::Handle*>(e))) {
+      // Callback took ownership of obj; just free handle
+      free(e);
+    } else {
+      e->Free(table_.GetAllocator());
+    }
+  }
+  return must_free;
+}
+
+LRUHandle* LRUCacheShard::CreateHandle(const Slice& key, uint32_t hash,
+                                       Cache::ObjectPtr value,
+                                       const Cache::CacheItemHelper* helper,
+                                       size_t charge) {
+  assert(helper);
+  // value == nullptr is reserved for indicating failure in SecondaryCache
+  assert(!(helper->IsSecondaryCacheCompatible() && value == nullptr));
+
+  // Allocate the memory here outside of the mutex.
+  // If the cache is full, we'll have to release it.
+  // It shouldn't happen very often though.
+  LRUHandle* e =
+      static_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size()));
+
+  e->value = value;
+  e->m_flags = 0;
+  e->im_flags = 0;
+  e->helper = helper;
+  e->key_length = key.size();
+  e->hash = hash;
+  e->refs = 0;
+  e->next = e->prev = nullptr;
+  memcpy(e->key_data, key.data(), key.size());
+  e->CalcTotalCharge(charge, metadata_charge_policy_);
+
+  return e;
+}
+
+Status LRUCacheShard::Insert(const Slice& key, uint32_t hash,
+                             Cache::ObjectPtr value,
+                             const Cache::CacheItemHelper* helper,
+                             size_t charge, LRUHandle** handle,
+                             Cache::Priority priority) {
+  LRUHandle* e = CreateHandle(key, hash, value, helper, charge);
+  e->SetPriority(priority);
+  e->SetInCache(true);
+  return InsertItem(e, handle);
+}
+
+LRUHandle* LRUCacheShard::CreateStandalone(const Slice& key, uint32_t hash,
+                                           Cache::ObjectPtr value,
+                                           const Cache::CacheItemHelper* helper,
+                                           size_t charge,
+                                           bool allow_uncharged) {
+  LRUHandle* e = CreateHandle(key, hash, value, helper, charge);
+  e->SetIsStandalone(true);
+  e->Ref();
+
+  autovector<LRUHandle*> last_reference_list;
+
+  {
+    DMutexLock l(mutex_);
+
+    EvictFromLRU(e->total_charge, &last_reference_list);
+
+    if (strict_capacity_limit_ && (usage_ + e->total_charge) > capacity_) {
+      if (allow_uncharged) {
+        e->total_charge = 0;
+      } else {
+        free(e);
+        e = nullptr;
+      }
+    } else {
+      usage_ += e->total_charge;
+    }
+  }
+
+  NotifyEvicted(last_reference_list);
+  return e;
+}
+
+void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
+  LRUHandle* e;
+  bool last_reference = false;
+  {
+    DMutexLock l(mutex_);
+    e = table_.Remove(key, hash);
+    if (e != nullptr) {
+      assert(e->InCache());
+      e->SetInCache(false);
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
+        LRU_Remove(e);
+        assert(usage_ >= e->total_charge);
+        usage_ -= e->total_charge;
+        last_reference = true;
+      }
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  // last_reference will only be true if e != nullptr.
+  if (last_reference) {
+    e->Free(table_.GetAllocator());
+  }
+}
+
+size_t LRUCacheShard::GetUsage() const {
+  DMutexLock l(mutex_);
+  return usage_;
+}
+
+size_t LRUCacheShard::GetPinnedUsage() const {
+  DMutexLock l(mutex_);
+  assert(usage_ >= lru_usage_);
+  return usage_ - lru_usage_;
+}
+
+size_t LRUCacheShard::GetOccupancyCount() const {
+  DMutexLock l(mutex_);
+  return table_.GetOccupancyCount();
+}
+
+size_t LRUCacheShard::GetTableAddressCount() const {
+  DMutexLock l(mutex_);
+  return size_t{1} << table_.GetLengthBits();
+}
+
+void LRUCacheShard::AppendPrintableOptions(std::string& str) const {
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+  {
+    DMutexLock l(mutex_);
+    snprintf(buffer, kBufferSize, "    high_pri_pool_ratio: %.3lf\n",
+             high_pri_pool_ratio_);
+    snprintf(buffer + strlen(buffer), kBufferSize - strlen(buffer),
+             "    low_pri_pool_ratio: %.3lf\n", low_pri_pool_ratio_);
+  }
+  str.append(buffer);
+}
+
+LRUCache::LRUCache(const LRUCacheOptions& opts) : ShardedCache(opts) {
+  size_t per_shard = GetPerShardCapacity();
+  MemoryAllocator* alloc = memory_allocator();
+  InitShards([&](LRUCacheShard* cs) {
+    new (cs) LRUCacheShard(per_shard, opts.strict_capacity_limit,
+                           opts.high_pri_pool_ratio, opts.low_pri_pool_ratio,
+                           opts.use_adaptive_mutex, opts.metadata_charge_policy,
+                           /* max_upper_hash_bits */ 32 - opts.num_shard_bits,
+                           alloc, &eviction_callback_);
+  });
+}
+
+Cache::ObjectPtr LRUCache::Value(Handle* handle) {
+  auto h = reinterpret_cast<const LRUHandle*>(handle);
+  return h->value;
+}
+
+size_t LRUCache::GetCharge(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->GetCharge(
+      GetShard(0).metadata_charge_policy_);
+}
+
+const Cache::CacheItemHelper* LRUCache::GetCacheItemHelper(
+    Handle* handle) const {
+  auto h = reinterpret_cast<const LRUHandle*>(handle);
+  return h->helper;
+}
+
+size_t LRUCache::TEST_GetLRUSize() {
+  return SumOverShards([](LRUCacheShard& cs) { return cs.TEST_GetLRUSize(); });
+}
+
+double LRUCache::GetHighPriPoolRatio() {
+  return GetShard(0).GetHighPriPoolRatio();
+}
+
+}  // namespace lru_cache
+
+std::shared_ptr<Cache> LRUCacheOptions::MakeSharedCache() const {
+  if (num_shard_bits >= 20) {
+    return nullptr;  // The cache cannot be sharded into too many fine pieces.
+  }
+  if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
+    // Invalid high_pri_pool_ratio
+    return nullptr;
+  }
+  if (low_pri_pool_ratio < 0.0 || low_pri_pool_ratio > 1.0) {
+    // Invalid low_pri_pool_ratio
+    return nullptr;
+  }
+  if (low_pri_pool_ratio + high_pri_pool_ratio > 1.0) {
+    // Invalid high_pri_pool_ratio and low_pri_pool_ratio combination
+    return nullptr;
+  }
+  // For sanitized options
+  LRUCacheOptions opts = *this;
+  if (opts.num_shard_bits < 0) {
+    opts.num_shard_bits = GetDefaultCacheShardBits(capacity);
+  }
+  std::shared_ptr<Cache> cache = std::make_shared<LRUCache>(opts);
+  if (secondary_cache) {
+    cache = std::make_shared<CacheWithSecondaryAdapter>(cache, secondary_cache);
+  }
+  return cache;
+}
+
+std::shared_ptr<RowCache> LRUCacheOptions::MakeSharedRowCache() const {
+  if (secondary_cache) {
+    // Not allowed for a RowCache
+    return nullptr;
+  }
+  // Works while RowCache is an alias for Cache
+  return MakeSharedCache();
+}
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/lru_cache.h
+++ b/cache/lru_cache.h
@ -0,0 +1,467 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "cache/sharded_cache.h"
+#include "port/lang.h"
+#include "port/likely.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "util/autovector.h"
+#include "util/distributed_mutex.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
+
+// LRU cache implementation. This class is not thread-safe.
+
+// An entry is a variable length heap-allocated structure.
+// Entries are referenced by cache and/or by any external entity.
+// The cache keeps all its entries in a hash table. Some elements
+// are also stored on LRU list.
+//
+// LRUHandle can be in these states:
+// 1. Referenced externally AND in hash table.
+//    In that case the entry is *not* in the LRU list
+//    (refs >= 1 && in_cache == true)
+// 2. Not referenced externally AND in hash table.
+//    In that case the entry is in the LRU list and can be freed.
+//    (refs == 0 && in_cache == true)
+// 3. Referenced externally AND not in hash table.
+//    In that case the entry is not in the LRU list and not in hash table.
+//    The entry must be freed if refs becomes 0 in this state.
+//    (refs >= 1 && in_cache == false)
+// If you call LRUCacheShard::Release enough times on an entry in state 1, it
+// will go into state 2. To move from state 1 to state 3, either call
+// LRUCacheShard::Erase or LRUCacheShard::Insert with the same key (but
+// possibly different value). To move from state 2 to state 1, use
+// LRUCacheShard::Lookup.
+// While refs > 0, public properties like value and deleter must not change.
+
+struct LRUHandle {
+  Cache::ObjectPtr value;
+  const Cache::CacheItemHelper* helper;
+  LRUHandle* next_hash;
+  LRUHandle* next;
+  LRUHandle* prev;
+  size_t total_charge;  // TODO(opt): Only allow uint32_t?
+  size_t key_length;
+  // The hash of key(). Used for fast sharding and comparisons.
+  uint32_t hash;
+  // The number of external refs to this entry. The cache itself is not counted.
+  uint32_t refs;
+
+  // Mutable flags - access controlled by mutex
+  // The m_ and M_ prefixes (and im_ and IM_ later) are to hopefully avoid
+  // checking an M_ flag on im_flags or an IM_ flag on m_flags.
+  uint8_t m_flags;
+  enum MFlags : uint8_t {
+    // Whether this entry is referenced by the hash table.
+    M_IN_CACHE = (1 << 0),
+    // Whether this entry has had any lookups (hits).
+    M_HAS_HIT = (1 << 1),
+    // Whether this entry is in high-pri pool.
+    M_IN_HIGH_PRI_POOL = (1 << 2),
+    // Whether this entry is in low-pri pool.
+    M_IN_LOW_PRI_POOL = (1 << 3),
+  };
+
+  // "Immutable" flags - only set in single-threaded context and then
+  // can be accessed without mutex
+  uint8_t im_flags;
+  enum ImFlags : uint8_t {
+    // Whether this entry is high priority entry.
+    IM_IS_HIGH_PRI = (1 << 0),
+    // Whether this entry is low priority entry.
+    IM_IS_LOW_PRI = (1 << 1),
+    // Marks result handles that should not be inserted into cache
+    IM_IS_STANDALONE = (1 << 2),
+  };
+
+  // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
+  char key_data[1];
+
+  Slice key() const { return Slice(key_data, key_length); }
+
+  // For HandleImpl concept
+  uint32_t GetHash() const { return hash; }
+
+  // Increase the reference count by 1.
+  void Ref() { refs++; }
+
+  // Just reduce the reference count by 1. Return true if it was last reference.
+  bool Unref() {
+    assert(refs > 0);
+    refs--;
+    return refs == 0;
+  }
+
+  // Return true if there are external refs, false otherwise.
+  bool HasRefs() const { return refs > 0; }
+
+  bool InCache() const { return m_flags & M_IN_CACHE; }
+  bool IsHighPri() const { return im_flags & IM_IS_HIGH_PRI; }
+  bool InHighPriPool() const { return m_flags & M_IN_HIGH_PRI_POOL; }
+  bool IsLowPri() const { return im_flags & IM_IS_LOW_PRI; }
+  bool InLowPriPool() const { return m_flags & M_IN_LOW_PRI_POOL; }
+  bool HasHit() const { return m_flags & M_HAS_HIT; }
+  bool IsStandalone() const { return im_flags & IM_IS_STANDALONE; }
+
+  void SetInCache(bool in_cache) {
+    if (in_cache) {
+      m_flags |= M_IN_CACHE;
+    } else {
+      m_flags &= ~M_IN_CACHE;
+    }
+  }
+
+  void SetPriority(Cache::Priority priority) {
+    if (priority == Cache::Priority::HIGH) {
+      im_flags |= IM_IS_HIGH_PRI;
+      im_flags &= ~IM_IS_LOW_PRI;
+    } else if (priority == Cache::Priority::LOW) {
+      im_flags &= ~IM_IS_HIGH_PRI;
+      im_flags |= IM_IS_LOW_PRI;
+    } else {
+      im_flags &= ~IM_IS_HIGH_PRI;
+      im_flags &= ~IM_IS_LOW_PRI;
+    }
+  }
+
+  void SetInHighPriPool(bool in_high_pri_pool) {
+    if (in_high_pri_pool) {
+      m_flags |= M_IN_HIGH_PRI_POOL;
+    } else {
+      m_flags &= ~M_IN_HIGH_PRI_POOL;
+    }
+  }
+
+  void SetInLowPriPool(bool in_low_pri_pool) {
+    if (in_low_pri_pool) {
+      m_flags |= M_IN_LOW_PRI_POOL;
+    } else {
+      m_flags &= ~M_IN_LOW_PRI_POOL;
+    }
+  }
+
+  void SetHit() { m_flags |= M_HAS_HIT; }
+
+  void SetIsStandalone(bool is_standalone) {
+    if (is_standalone) {
+      im_flags |= IM_IS_STANDALONE;
+    } else {
+      im_flags &= ~IM_IS_STANDALONE;
+    }
+  }
+
+  void Free(MemoryAllocator* allocator) {
+    assert(refs == 0);
+    assert(helper);
+    if (helper->del_cb) {
+      helper->del_cb(value, allocator);
+    }
+
+    free(this);
+  }
+
+  inline size_t CalcuMetaCharge(
+      CacheMetadataChargePolicy metadata_charge_policy) const {
+    if (metadata_charge_policy != kFullChargeCacheMetadata) {
+      return 0;
+    } else {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      return malloc_usable_size(
+          const_cast<void*>(static_cast<const void*>(this)));
+#else
+      // This is the size that is used when a new handle is created.
+      return sizeof(LRUHandle) - 1 + key_length;
+#endif
+    }
+  }
+
+  // Calculate the memory usage by metadata.
+  inline void CalcTotalCharge(
+      size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
+    total_charge = charge + CalcuMetaCharge(metadata_charge_policy);
+  }
+
+  inline size_t GetCharge(
+      CacheMetadataChargePolicy metadata_charge_policy) const {
+    size_t meta_charge = CalcuMetaCharge(metadata_charge_policy);
+    assert(total_charge >= meta_charge);
+    return total_charge - meta_charge;
+  }
+};
+
+// We provide our own simple hash table since it removes a whole bunch
+// of porting hacks and is also faster than some of the built-in hash
+// table implementations in some of the compiler/runtime combinations
+// we have tested.  E.g., readrandom speeds up by ~5% over the g++
+// 4.4.3's builtin hashtable.
+class LRUHandleTable {
+ public:
+  explicit LRUHandleTable(int max_upper_hash_bits, MemoryAllocator* allocator);
+  ~LRUHandleTable();
+
+  LRUHandle* Lookup(const Slice& key, uint32_t hash);
+  LRUHandle* Insert(LRUHandle* h);
+  LRUHandle* Remove(const Slice& key, uint32_t hash);
+
+  template <typename T>
+  void ApplyToEntriesRange(T func, size_t index_begin, size_t index_end) {
+    for (size_t i = index_begin; i < index_end; i++) {
+      LRUHandle* h = list_[i];
+      while (h != nullptr) {
+        auto n = h->next_hash;
+        assert(h->InCache());
+        func(h);
+        h = n;
+      }
+    }
+  }
+
+  int GetLengthBits() const { return length_bits_; }
+
+  size_t GetOccupancyCount() const { return elems_; }
+
+  MemoryAllocator* GetAllocator() const { return allocator_; }
+
+ private:
+  // Return a pointer to slot that points to a cache entry that
+  // matches key/hash.  If there is no such cache entry, return a
+  // pointer to the trailing slot in the corresponding linked list.
+  LRUHandle** FindPointer(const Slice& key, uint32_t hash);
+
+  void Resize();
+
+  // Number of hash bits (upper because lower bits used for sharding)
+  // used for table index. Length == 1 << length_bits_
+  int length_bits_;
+
+  // The table consists of an array of buckets where each bucket is
+  // a linked list of cache entries that hash into the bucket.
+  std::unique_ptr<LRUHandle*[]> list_;
+
+  // Number of elements currently in the table.
+  uint32_t elems_;
+
+  // Set from max_upper_hash_bits (see constructor).
+  const int max_length_bits_;
+
+  // From Cache, needed for delete
+  MemoryAllocator* const allocator_;
+};
+
+// A single shard of sharded cache.
+class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShardBase {
+ public:
+  // NOTE: the eviction_callback ptr is saved, as is it assumed to be kept
+  // alive in Cache.
+  LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                double high_pri_pool_ratio, double low_pri_pool_ratio,
+                bool use_adaptive_mutex,
+                CacheMetadataChargePolicy metadata_charge_policy,
+                int max_upper_hash_bits, MemoryAllocator* allocator,
+                const Cache::EvictionCallback* eviction_callback);
+
+ public:  // Type definitions expected as parameter to ShardedCache
+  using HandleImpl = LRUHandle;
+  using HashVal = uint32_t;
+  using HashCref = uint32_t;
+
+ public:  // Function definitions expected as parameter to ShardedCache
+  static inline HashVal ComputeHash(const Slice& key, uint32_t seed) {
+    return Lower32of64(GetSliceNPHash64(key, seed));
+  }
+
+  // Separate from constructor so caller can easily make an array of LRUCache
+  // if current usage is more than new capacity, the function will attempt to
+  // free the needed space.
+  void SetCapacity(size_t capacity);
+
+  // Set the flag to reject insertion if cache if full.
+  void SetStrictCapacityLimit(bool strict_capacity_limit);
+
+  // Set percentage of capacity reserved for high-pri cache entries.
+  void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
+
+  // Set percentage of capacity reserved for low-pri cache entries.
+  void SetLowPriorityPoolRatio(double low_pri_pool_ratio);
+
+  // Like Cache methods, but with an extra "hash" parameter.
+  Status Insert(const Slice& key, uint32_t hash, Cache::ObjectPtr value,
+                const Cache::CacheItemHelper* helper, size_t charge,
+                LRUHandle** handle, Cache::Priority priority);
+
+  LRUHandle* CreateStandalone(const Slice& key, uint32_t hash,
+                              Cache::ObjectPtr obj,
+                              const Cache::CacheItemHelper* helper,
+                              size_t charge, bool allow_uncharged);
+
+  LRUHandle* Lookup(const Slice& key, uint32_t hash,
+                    const Cache::CacheItemHelper* helper,
+                    Cache::CreateContext* create_context,
+                    Cache::Priority priority, Statistics* stats);
+
+  bool Release(LRUHandle* handle, bool useful, bool erase_if_last_ref);
+  bool Ref(LRUHandle* handle);
+  void Erase(const Slice& key, uint32_t hash);
+
+  // Although in some platforms the update of size_t is atomic, to make sure
+  // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
+  // protect them with mutex_.
+
+  size_t GetUsage() const;
+  size_t GetPinnedUsage() const;
+  size_t GetOccupancyCount() const;
+  size_t GetTableAddressCount() const;
+
+  void ApplyToSomeEntries(
+      const std::function<void(const Slice& key, Cache::ObjectPtr value,
+                               size_t charge,
+                               const Cache::CacheItemHelper* helper)>& callback,
+      size_t average_entries_per_lock, size_t* state);
+
+  void EraseUnRefEntries();
+
+ public:  // other function definitions
+  void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri,
+                       LRUHandle** lru_bottom_pri);
+
+  // Retrieves number of elements in LRU, for unit test purpose only.
+  // Not threadsafe.
+  size_t TEST_GetLRUSize();
+
+  // Retrieves high pri pool ratio
+  double GetHighPriPoolRatio();
+
+  // Retrieves low pri pool ratio
+  double GetLowPriPoolRatio();
+
+  void AppendPrintableOptions(std::string& /*str*/) const;
+
+ private:
+  friend class LRUCache;
+  // Insert an item into the hash table and, if handle is null, insert into
+  // the LRU list. Older items are evicted as necessary. Frees `item` on
+  // non-OK status.
+  Status InsertItem(LRUHandle* item, LRUHandle** handle);
+
+  void LRU_Remove(LRUHandle* e);
+  void LRU_Insert(LRUHandle* e);
+
+  // Overflow the last entry in high-pri pool to low-pri pool until size of
+  // high-pri pool is no larger than the size specify by high_pri_pool_pct.
+  void MaintainPoolSize();
+
+  // Free some space following strict LRU policy until enough space
+  // to hold (usage_ + charge) is freed or the lru list is empty
+  // This function is not thread safe - it needs to be executed while
+  // holding the mutex_.
+  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
+
+  void NotifyEvicted(const autovector<LRUHandle*>& evicted_handles);
+
+  LRUHandle* CreateHandle(const Slice& key, uint32_t hash,
+                          Cache::ObjectPtr value,
+                          const Cache::CacheItemHelper* helper, size_t charge);
+
+  // Initialized before use.
+  size_t capacity_;
+
+  // Memory size for entries in high-pri pool.
+  size_t high_pri_pool_usage_;
+
+  // Memory size for entries in low-pri pool.
+  size_t low_pri_pool_usage_;
+
+  // Whether to reject insertion if cache reaches its full capacity.
+  bool strict_capacity_limit_;
+
+  // Ratio of capacity reserved for high priority cache entries.
+  double high_pri_pool_ratio_;
+
+  // High-pri pool size, equals to capacity * high_pri_pool_ratio.
+  // Remember the value to avoid recomputing each time.
+  double high_pri_pool_capacity_;
+
+  // Ratio of capacity reserved for low priority cache entries.
+  double low_pri_pool_ratio_;
+
+  // Low-pri pool size, equals to capacity * low_pri_pool_ratio.
+  // Remember the value to avoid recomputing each time.
+  double low_pri_pool_capacity_;
+
+  // Dummy head of LRU list.
+  // lru.prev is newest entry, lru.next is oldest entry.
+  // LRU contains items which can be evicted, ie reference only by cache
+  LRUHandle lru_;
+
+  // Pointer to head of low-pri pool in LRU list.
+  LRUHandle* lru_low_pri_;
+
+  // Pointer to head of bottom-pri pool in LRU list.
+  LRUHandle* lru_bottom_pri_;
+
+  // ------------^^^^^^^^^^^^^-----------
+  // Not frequently modified data members
+  // ------------------------------------
+  //
+  // We separate data members that are updated frequently from the ones that
+  // are not frequently updated so that they don't share the same cache line
+  // which will lead into false cache sharing
+  //
+  // ------------------------------------
+  // Frequently modified data members
+  // ------------vvvvvvvvvvvvv-----------
+  LRUHandleTable table_;
+
+  // Memory size for entries residing in the cache.
+  size_t usage_;
+
+  // Memory size for entries residing only in the LRU list.
+  size_t lru_usage_;
+
+  // mutex_ protects the following state.
+  // We don't count mutex_ as the cache's internal state so semantically we
+  // don't mind mutex_ invoking the non-const actions.
+  mutable DMutex mutex_;
+
+  // A reference to Cache::eviction_callback_
+  const Cache::EvictionCallback& eviction_callback_;
+};
+
+class LRUCache
+#ifdef NDEBUG
+    final
+#endif
+    : public ShardedCache<LRUCacheShard> {
+ public:
+  explicit LRUCache(const LRUCacheOptions& opts);
+  const char* Name() const override { return "LRUCache"; }
+  ObjectPtr Value(Handle* handle) override;
+  size_t GetCharge(Handle* handle) const override;
+  const CacheItemHelper* GetCacheItemHelper(Handle* handle) const override;
+
+  // Retrieves number of elements in LRU, for unit test purpose only.
+  size_t TEST_GetLRUSize();
+  // Retrieves high pri pool ratio.
+  double GetHighPriPoolRatio();
+};
+
+}  // namespace lru_cache
+
+using LRUCache = lru_cache::LRUCache;
+using LRUHandle = lru_cache::LRUHandle;
+using LRUCacheShard = lru_cache::LRUCacheShard;
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/lru_cache_test.cc
+++ b/cache/lru_cache_test.cc
--- a/cache/secondary_cache.cc
+++ b/cache/secondary_cache.cc
@ -0,0 +1,44 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/secondary_cache.h"
+
+#include "cache/cache_entry_roles.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+void NoopDelete(Cache::ObjectPtr, MemoryAllocator*) {}
+
+size_t SliceSize(Cache::ObjectPtr obj) {
+  return static_cast<Slice*>(obj)->size();
+}
+
+Status SliceSaveTo(Cache::ObjectPtr from_obj, size_t from_offset, size_t length,
+                   char* out) {
+  const Slice& slice = *static_cast<Slice*>(from_obj);
+  std::memcpy(out, slice.data() + from_offset, length);
+  return Status::OK();
+}
+
+Status FailCreate(const Slice&, Cache::CreateContext*, MemoryAllocator*,
+                  Cache::ObjectPtr*, size_t*) {
+  return Status::NotSupported("Only for dumping data into SecondaryCache");
+}
+
+}  // namespace
+
+Status SecondaryCache::InsertSaved(const Slice& key, const Slice& saved) {
+  static Cache::CacheItemHelper helper_no_secondary{CacheEntryRole::kMisc,
+                                                    &NoopDelete};
+  static Cache::CacheItemHelper helper{
+      CacheEntryRole::kMisc, &NoopDelete, &SliceSize,
+      &SliceSaveTo,          &FailCreate, &helper_no_secondary};
+  // NOTE: depends on Insert() being synchronous, not keeping pointer `&saved`
+  return Insert(key, const_cast<Slice*>(&saved), &helper);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/secondary_cache_adapter.cc
+++ b/cache/secondary_cache_adapter.cc
@ -0,0 +1,433 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "cache/secondary_cache_adapter.h"
+
+#include "monitoring/perf_context_imp.h"
+#include "util/cast_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+// A distinct pointer value for marking "dummy" cache entries
+struct Dummy {
+  char val[7] = "kDummy";
+};
+const Dummy kDummy{};
+Cache::ObjectPtr const kDummyObj = const_cast<Dummy*>(&kDummy);
+}  // namespace
+
+// When CacheWithSecondaryAdapter is constructed with the distribute_cache_res
+// parameter set to true, it manages the entire memory budget across the
+// primary and secondary cache. The secondary cache is assumed to be in
+// memory, such as the CompressedSecondaryCache. When a placeholder entry
+// is inserted by a CacheReservationManager instance to reserve memory,
+// the CacheWithSecondaryAdapter ensures that the reservation is distributed
+// proportionally across the primary/secondary caches.
+//
+// The primary block cache is initially sized to the sum of the primary cache
+// budget + teh secondary cache budget, as follows -
+//   |---------    Primary Cache Configured Capacity  -----------|
+//   |---Secondary Cache Budget----|----Primary Cache Budget-----|
+//
+// A ConcurrentCacheReservationManager member in the CacheWithSecondaryAdapter,
+// pri_cache_res_,
+// is used to help with tracking the distribution of memory reservations.
+// Initially, it accounts for the entire secondary cache budget as a
+// reservation against the primary cache. This shrinks the usable capacity of
+// the primary cache to the budget that the user originally desired.
+//
+//   |--Reservation for Sec Cache--|-Pri Cache Usable Capacity---|
+//
+// When a reservation placeholder is inserted into the adapter, it is inserted
+// directly into the primary cache. This means the entire charge of the
+// placeholder is counted against the primary cache. To compensate and count
+// a portion of it against the secondary cache, the secondary cache Deflate()
+// method is called to shrink it. Since the Deflate() causes the secondary
+// actual usage to shrink, it is refelcted here by releasing an equal amount
+// from the pri_cache_res_ reservation. The Deflate() in the secondary cache
+// can be, but is not required to be, implemented using its own cache
+// reservation manager.
+//
+// For example, if the pri/sec ratio is 70/30, and the combined capacity is
+// 100MB, the intermediate and final  state after inserting a reservation
+// placeholder for 10MB would be as follows -
+//
+//   |-Reservation for Sec Cache-|-Pri Cache Usable Capacity-|---R---|
+// 1. After inserting the placeholder in primary
+//   |-------  30MB -------------|------- 60MB -------------|-10MB--|
+// 2. After deflating the secondary and adjusting the reservation for
+//    secondary against the primary
+//   |-------  27MB -------------|------- 63MB -------------|-10MB--|
+//
+// Likewise, when the user inserted placeholder is released, the secondary
+// cache Inflate() method is called to grow it, and the pri_cache_res_
+// reservation is increased by an equal amount.
+//
+// Another way of implementing this would have been to simply split the user
+// reservation into primary and seconary components. However, this would
+// require allocating a structure to track the associated secondary cache
+// reservation, which adds some complexity and overhead.
+//
+CacheWithSecondaryAdapter::CacheWithSecondaryAdapter(
+    std::shared_ptr<Cache> target,
+    std::shared_ptr<SecondaryCache> secondary_cache, bool distribute_cache_res)
+    : CacheWrapper(std::move(target)),
+      secondary_cache_(std::move(secondary_cache)),
+      distribute_cache_res_(distribute_cache_res) {
+  target_->SetEvictionCallback([this](const Slice& key, Handle* handle) {
+    return EvictionHandler(key, handle);
+  });
+  if (distribute_cache_res_) {
+    size_t sec_capacity = 0;
+    pri_cache_res_ = std::make_shared<ConcurrentCacheReservationManager>(
+        std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
+            target_));
+    Status s = secondary_cache_->GetCapacity(sec_capacity);
+    assert(s.ok());
+    // Initially, the primary cache is sized to uncompressed cache budget plsu
+    // compressed secondary cache budget. The secondary cache budget is then
+    // taken away from the primary cache through cache reservations. Later,
+    // when a placeholder entry is inserted by the caller, its inserted
+    // into the primary cache and the portion that should be assigned to the
+    // secondary cache is freed from the reservation.
+    s = pri_cache_res_->UpdateCacheReservation(sec_capacity);
+    assert(s.ok());
+    sec_cache_res_ratio_ = (double)sec_capacity / target_->GetCapacity();
+  }
+}
+
+CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() {
+  // `*this` will be destroyed before `*target_`, so we have to prevent
+  // use after free
+  target_->SetEvictionCallback({});
+#ifndef NDEBUG
+  if (distribute_cache_res_) {
+    size_t sec_capacity = 0;
+    Status s = secondary_cache_->GetCapacity(sec_capacity);
+    assert(s.ok());
+    assert(pri_cache_res_->GetTotalReservedCacheSize() == sec_capacity);
+  }
+#endif  // NDEBUG
+}
+
+bool CacheWithSecondaryAdapter::EvictionHandler(const Slice& key,
+                                                Handle* handle) {
+  auto helper = GetCacheItemHelper(handle);
+  if (helper->IsSecondaryCacheCompatible()) {
+    auto obj = target_->Value(handle);
+    // Ignore dummy entry
+    if (obj != kDummyObj) {
+      // Spill into secondary cache.
+      secondary_cache_->Insert(key, obj, helper).PermitUncheckedError();
+    }
+  }
+  // Never takes ownership of obj
+  return false;
+}
+
+bool CacheWithSecondaryAdapter::ProcessDummyResult(Cache::Handle** handle,
+                                                   bool erase) {
+  if (*handle && target_->Value(*handle) == kDummyObj) {
+    target_->Release(*handle, erase);
+    *handle = nullptr;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void CacheWithSecondaryAdapter::CleanupCacheObject(
+    ObjectPtr obj, const CacheItemHelper* helper) {
+  if (helper->del_cb) {
+    helper->del_cb(obj, memory_allocator());
+  }
+}
+
+Cache::Handle* CacheWithSecondaryAdapter::Promote(
+    std::unique_ptr<SecondaryCacheResultHandle>&& secondary_handle,
+    const Slice& key, const CacheItemHelper* helper, Priority priority,
+    Statistics* stats, bool found_dummy_entry, bool kept_in_sec_cache) {
+  assert(secondary_handle->IsReady());
+
+  ObjectPtr obj = secondary_handle->Value();
+  if (!obj) {
+    // Nothing found.
+    return nullptr;
+  }
+  // Found something.
+  switch (helper->role) {
+    case CacheEntryRole::kFilterBlock:
+      RecordTick(stats, SECONDARY_CACHE_FILTER_HITS);
+      break;
+    case CacheEntryRole::kIndexBlock:
+      RecordTick(stats, SECONDARY_CACHE_INDEX_HITS);
+      break;
+    case CacheEntryRole::kDataBlock:
+      RecordTick(stats, SECONDARY_CACHE_DATA_HITS);
+      break;
+    default:
+      break;
+  }
+  PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
+  RecordTick(stats, SECONDARY_CACHE_HITS);
+
+  // Note: SecondaryCache::Size() is really charge (from the CreateCallback)
+  size_t charge = secondary_handle->Size();
+  Handle* result = nullptr;
+  // Insert into primary cache, possibly as a standalone+dummy entries.
+  if (secondary_cache_->SupportForceErase() && !found_dummy_entry) {
+    // Create standalone and insert dummy
+    // Allow standalone to be created even if cache is full, to avoid
+    // reading the entry from storage.
+    result =
+        CreateStandalone(key, obj, helper, charge, /*allow_uncharged*/ true);
+    assert(result);
+    PERF_COUNTER_ADD(block_cache_standalone_handle_count, 1);
+
+    // Insert dummy to record recent use
+    // TODO: try to avoid case where inserting this dummy could overwrite a
+    // regular entry
+    Status s = Insert(key, kDummyObj, &kNoopCacheItemHelper, /*charge=*/0,
+                      /*handle=*/nullptr, priority);
+    s.PermitUncheckedError();
+    // Nothing to do or clean up on dummy insertion failure
+  } else {
+    // Insert regular entry into primary cache.
+    // Don't allow it to spill into secondary cache again if it was kept there.
+    Status s = Insert(
+        key, obj, kept_in_sec_cache ? helper->without_secondary_compat : helper,
+        charge, &result, priority);
+    if (s.ok()) {
+      assert(result);
+      PERF_COUNTER_ADD(block_cache_real_handle_count, 1);
+    } else {
+      // Create standalone result instead, even if cache is full, to avoid
+      // reading the entry from storage.
+      result =
+          CreateStandalone(key, obj, helper, charge, /*allow_uncharged*/ true);
+      assert(result);
+      PERF_COUNTER_ADD(block_cache_standalone_handle_count, 1);
+    }
+  }
+  return result;
+}
+
+Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value,
+                                         const CacheItemHelper* helper,
+                                         size_t charge, Handle** handle,
+                                         Priority priority) {
+  Status s = target_->Insert(key, value, helper, charge, handle, priority);
+  if (s.ok() && value == nullptr && distribute_cache_res_) {
+    size_t sec_charge = static_cast<size_t>(charge * (sec_cache_res_ratio_));
+    s = secondary_cache_->Deflate(sec_charge);
+    assert(s.ok());
+    s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false);
+    assert(s.ok());
+  }
+
+  return s;
+}
+
+Cache::Handle* CacheWithSecondaryAdapter::Lookup(const Slice& key,
+                                                 const CacheItemHelper* helper,
+                                                 CreateContext* create_context,
+                                                 Priority priority,
+                                                 Statistics* stats) {
+  // NOTE: we could just StartAsyncLookup() and Wait(), but this should be a bit
+  // more efficient
+  Handle* result =
+      target_->Lookup(key, helper, create_context, priority, stats);
+  bool secondary_compatible = helper && helper->IsSecondaryCacheCompatible();
+  bool found_dummy_entry =
+      ProcessDummyResult(&result, /*erase=*/secondary_compatible);
+  if (!result && secondary_compatible) {
+    // Try our secondary cache
+    bool kept_in_sec_cache = false;
+    std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
+        secondary_cache_->Lookup(key, helper, create_context, /*wait*/ true,
+                                 found_dummy_entry, /*out*/ kept_in_sec_cache);
+    if (secondary_handle) {
+      result = Promote(std::move(secondary_handle), key, helper, priority,
+                       stats, found_dummy_entry, kept_in_sec_cache);
+    }
+  }
+  return result;
+}
+
+bool CacheWithSecondaryAdapter::Release(Handle* handle,
+                                        bool erase_if_last_ref) {
+  if (erase_if_last_ref) {
+    ObjectPtr v = target_->Value(handle);
+    if (v == nullptr && distribute_cache_res_) {
+      size_t charge = target_->GetCharge(handle);
+      size_t sec_charge = static_cast<size_t>(charge * (sec_cache_res_ratio_));
+      Status s = secondary_cache_->Inflate(sec_charge);
+      assert(s.ok());
+      s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/true);
+      assert(s.ok());
+    }
+  }
+  return target_->Release(handle, erase_if_last_ref);
+}
+
+Cache::ObjectPtr CacheWithSecondaryAdapter::Value(Handle* handle) {
+  ObjectPtr v = target_->Value(handle);
+  // TODO with stacked secondaries: might fail in EvictionHandler
+  assert(v != kDummyObj);
+  return v;
+}
+
+void CacheWithSecondaryAdapter::StartAsyncLookupOnMySecondary(
+    AsyncLookupHandle& async_handle) {
+  assert(!async_handle.IsPending());
+  assert(async_handle.result_handle == nullptr);
+
+  std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
+      secondary_cache_->Lookup(async_handle.key, async_handle.helper,
+                               async_handle.create_context, /*wait*/ false,
+                               async_handle.found_dummy_entry,
+                               /*out*/ async_handle.kept_in_sec_cache);
+  if (secondary_handle) {
+    // TODO with stacked secondaries: Check & process if already ready?
+    async_handle.pending_handle = secondary_handle.release();
+    async_handle.pending_cache = secondary_cache_.get();
+  }
+}
+
+void CacheWithSecondaryAdapter::StartAsyncLookup(
+    AsyncLookupHandle& async_handle) {
+  target_->StartAsyncLookup(async_handle);
+  if (!async_handle.IsPending()) {
+    bool secondary_compatible =
+        async_handle.helper &&
+        async_handle.helper->IsSecondaryCacheCompatible();
+    async_handle.found_dummy_entry |= ProcessDummyResult(
+        &async_handle.result_handle, /*erase=*/secondary_compatible);
+
+    if (async_handle.Result() == nullptr && secondary_compatible) {
+      // Not found and not pending on another secondary cache
+      StartAsyncLookupOnMySecondary(async_handle);
+    }
+  }
+}
+
+void CacheWithSecondaryAdapter::WaitAll(AsyncLookupHandle* async_handles,
+                                        size_t count) {
+  if (count == 0) {
+    // Nothing to do
+    return;
+  }
+  // Requests that are pending on *my* secondary cache, at the start of this
+  // function
+  std::vector<AsyncLookupHandle*> my_pending;
+  // Requests that are pending on an "inner" secondary cache (managed somewhere
+  // under target_), as of the start of this function
+  std::vector<AsyncLookupHandle*> inner_pending;
+
+  // Initial accounting of pending handles, excluding those already handled
+  // by "outer" secondary caches. (See cur->pending_cache = nullptr.)
+  for (size_t i = 0; i < count; ++i) {
+    AsyncLookupHandle* cur = async_handles + i;
+    if (cur->pending_cache) {
+      assert(cur->IsPending());
+      assert(cur->helper);
+      assert(cur->helper->IsSecondaryCacheCompatible());
+      if (cur->pending_cache == secondary_cache_.get()) {
+        my_pending.push_back(cur);
+        // Mark as "to be handled by this caller"
+        cur->pending_cache = nullptr;
+      } else {
+        // Remember as potentially needing a lookup in my secondary
+        inner_pending.push_back(cur);
+      }
+    }
+  }
+
+  // Wait on inner-most cache lookups first
+  // TODO with stacked secondaries: because we are not using proper
+  // async/await constructs here yet, there is a false synchronization point
+  // here where all the results at one level are needed before initiating
+  // any lookups at the next level. Probably not a big deal, but worth noting.
+  if (!inner_pending.empty()) {
+    target_->WaitAll(async_handles, count);
+  }
+
+  // For those that failed to find something, convert to lookup in my
+  // secondary cache.
+  for (AsyncLookupHandle* cur : inner_pending) {
+    if (cur->Result() == nullptr) {
+      // Not found, try my secondary
+      StartAsyncLookupOnMySecondary(*cur);
+      if (cur->IsPending()) {
+        assert(cur->pending_cache == secondary_cache_.get());
+        my_pending.push_back(cur);
+        // Mark as "to be handled by this caller"
+        cur->pending_cache = nullptr;
+      }
+    }
+  }
+
+  // Wait on all lookups on my secondary cache
+  {
+    std::vector<SecondaryCacheResultHandle*> my_secondary_handles;
+    for (AsyncLookupHandle* cur : my_pending) {
+      my_secondary_handles.push_back(cur->pending_handle);
+    }
+    secondary_cache_->WaitAll(std::move(my_secondary_handles));
+  }
+
+  // Process results
+  for (AsyncLookupHandle* cur : my_pending) {
+    std::unique_ptr<SecondaryCacheResultHandle> secondary_handle(
+        cur->pending_handle);
+    cur->pending_handle = nullptr;
+    cur->result_handle = Promote(
+        std::move(secondary_handle), cur->key, cur->helper, cur->priority,
+        cur->stats, cur->found_dummy_entry, cur->kept_in_sec_cache);
+    assert(cur->pending_cache == nullptr);
+  }
+}
+
+std::string CacheWithSecondaryAdapter::GetPrintableOptions() const {
+  std::string str = target_->GetPrintableOptions();
+  str.append("  secondary_cache:\n");
+  str.append(secondary_cache_->GetPrintableOptions());
+  return str;
+}
+
+const char* CacheWithSecondaryAdapter::Name() const {
+  // To the user, at least for now, configure the underlying cache with
+  // a secondary cache. So we pretend to be that cache
+  return target_->Name();
+}
+
+std::shared_ptr<Cache> NewTieredVolatileCache(
+    TieredVolatileCacheOptions& opts) {
+  if (!opts.cache_opts) {
+    return nullptr;
+  }
+
+  std::shared_ptr<Cache> cache;
+  if (opts.cache_type == PrimaryCacheType::kCacheTypeLRU) {
+    LRUCacheOptions cache_opts =
+        *(static_cast_with_check<LRUCacheOptions, ShardedCacheOptions>(
+            opts.cache_opts));
+    cache_opts.capacity += opts.comp_cache_opts.capacity;
+    cache = cache_opts.MakeSharedCache();
+  } else if (opts.cache_type == PrimaryCacheType::kCacheTypeHCC) {
+    HyperClockCacheOptions cache_opts =
+        *(static_cast_with_check<HyperClockCacheOptions, ShardedCacheOptions>(
+            opts.cache_opts));
+    cache = cache_opts.MakeSharedCache();
+  } else {
+    return nullptr;
+  }
+  std::shared_ptr<SecondaryCache> sec_cache;
+  sec_cache = NewCompressedSecondaryCache(opts.comp_cache_opts);
+
+  return std::make_shared<CacheWithSecondaryAdapter>(cache, sec_cache, true);
+}
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/secondary_cache_adapter.h
+++ b/cache/secondary_cache_adapter.h
@ -0,0 +1,76 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "cache/cache_reservation_manager.h"
+#include "rocksdb/secondary_cache.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class CacheWithSecondaryAdapter : public CacheWrapper {
+ public:
+  explicit CacheWithSecondaryAdapter(
+      std::shared_ptr<Cache> target,
+      std::shared_ptr<SecondaryCache> secondary_cache,
+      bool distribute_cache_res = false);
+
+  ~CacheWithSecondaryAdapter() override;
+
+  Status Insert(const Slice& key, ObjectPtr value,
+                const CacheItemHelper* helper, size_t charge,
+                Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override;
+
+  Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
+                 CreateContext* create_context,
+                 Priority priority = Priority::LOW,
+                 Statistics* stats = nullptr) override;
+
+  using Cache::Release;
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override;
+
+  ObjectPtr Value(Handle* handle) override;
+
+  void StartAsyncLookup(AsyncLookupHandle& async_handle) override;
+
+  void WaitAll(AsyncLookupHandle* async_handles, size_t count) override;
+
+  std::string GetPrintableOptions() const override;
+
+  const char* Name() const override;
+
+  Cache* TEST_GetCache() { return target_.get(); }
+
+  SecondaryCache* TEST_GetSecondaryCache() { return secondary_cache_.get(); }
+
+ private:
+  bool EvictionHandler(const Slice& key, Handle* handle);
+
+  void StartAsyncLookupOnMySecondary(AsyncLookupHandle& async_handle);
+
+  Handle* Promote(
+      std::unique_ptr<SecondaryCacheResultHandle>&& secondary_handle,
+      const Slice& key, const CacheItemHelper* helper, Priority priority,
+      Statistics* stats, bool found_dummy_entry, bool kept_in_sec_cache);
+
+  bool ProcessDummyResult(Cache::Handle** handle, bool erase);
+
+  void CleanupCacheObject(ObjectPtr obj, const CacheItemHelper* helper);
+
+  std::shared_ptr<SecondaryCache> secondary_cache_;
+  // Whether to proportionally distribute cache memory reservations, i.e
+  // placeholder entries with null value and a non-zero charge, across
+  // the primary and secondary caches.
+  bool distribute_cache_res_;
+  // A cache reservation manager to keep track of secondary cache memory
+  // usage by reserving equivalent capacity against the primary cache
+  std::shared_ptr<ConcurrentCacheReservationManager> pri_cache_res_;
+  // Fraction of a cache memory reservation to be assigned to the secondary
+  // cache
+  double sec_cache_res_ratio_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/sharded_cache.cc
+++ b/cache/sharded_cache.cc
@ -0,0 +1,137 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "cache/sharded_cache.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+
+#include "env/unique_id_gen.h"
+#include "rocksdb/env.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+// The generated seeds must fit in 31 bits so that
+// ShardedCacheOptions::hash_seed can be set to it explicitly, for
+// diagnostic/debugging purposes.
+constexpr uint32_t kSeedMask = 0x7fffffff;
+uint32_t DetermineSeed(int32_t hash_seed_option) {
+  if (hash_seed_option >= 0) {
+    // User-specified exact seed
+    return static_cast<uint32_t>(hash_seed_option);
+  }
+  static SemiStructuredUniqueIdGen gen;
+  if (hash_seed_option == ShardedCacheOptions::kHostHashSeed) {
+    std::string hostname;
+    Status s = Env::Default()->GetHostNameString(&hostname);
+    if (s.ok()) {
+      return GetSliceHash(hostname) & kSeedMask;
+    } else {
+      // Fall back on something stable within the process.
+      return BitwiseAnd(gen.GetBaseUpper(), kSeedMask);
+    }
+  } else {
+    // for kQuasiRandomHashSeed and fallback
+    uint32_t val = gen.GenerateNext<uint32_t>() & kSeedMask;
+    // Perform some 31-bit bijective transformations so that we get
+    // quasirandom, not just incrementing. (An incrementing seed from a
+    // random starting point would be fine, but hard to describe in a name.)
+    // See https://en.wikipedia.org/wiki/Quasirandom and using a murmur-like
+    // transformation here for our bijection in the lower 31 bits.
+    // See https://en.wikipedia.org/wiki/MurmurHash
+    val *= /*31-bit prime*/ 1150630961;
+    val ^= (val & kSeedMask) >> 17;
+    val *= /*31-bit prime*/ 1320603883;
+    return val & kSeedMask;
+  }
+}
+}  // namespace
+
+ShardedCacheBase::ShardedCacheBase(const ShardedCacheOptions& opts)
+    : Cache(opts.memory_allocator),
+      last_id_(1),
+      shard_mask_((uint32_t{1} << opts.num_shard_bits) - 1),
+      hash_seed_(DetermineSeed(opts.hash_seed)),
+      strict_capacity_limit_(opts.strict_capacity_limit),
+      capacity_(opts.capacity) {}
+
+size_t ShardedCacheBase::ComputePerShardCapacity(size_t capacity) const {
+  uint32_t num_shards = GetNumShards();
+  return (capacity + (num_shards - 1)) / num_shards;
+}
+
+size_t ShardedCacheBase::GetPerShardCapacity() const {
+  return ComputePerShardCapacity(GetCapacity());
+}
+
+uint64_t ShardedCacheBase::NewId() {
+  return last_id_.fetch_add(1, std::memory_order_relaxed);
+}
+
+size_t ShardedCacheBase::GetCapacity() const {
+  MutexLock l(&config_mutex_);
+  return capacity_;
+}
+
+bool ShardedCacheBase::HasStrictCapacityLimit() const {
+  MutexLock l(&config_mutex_);
+  return strict_capacity_limit_;
+}
+
+size_t ShardedCacheBase::GetUsage(Handle* handle) const {
+  return GetCharge(handle);
+}
+
+std::string ShardedCacheBase::GetPrintableOptions() const {
+  std::string ret;
+  ret.reserve(20000);
+  const int kBufferSize = 200;
+  char buffer[kBufferSize];
+  {
+    MutexLock l(&config_mutex_);
+    snprintf(buffer, kBufferSize, "    capacity : %" ROCKSDB_PRIszt "\n",
+             capacity_);
+    ret.append(buffer);
+    snprintf(buffer, kBufferSize, "    num_shard_bits : %d\n",
+             GetNumShardBits());
+    ret.append(buffer);
+    snprintf(buffer, kBufferSize, "    strict_capacity_limit : %d\n",
+             strict_capacity_limit_);
+    ret.append(buffer);
+  }
+  snprintf(buffer, kBufferSize, "    memory_allocator : %s\n",
+           memory_allocator() ? memory_allocator()->Name() : "None");
+  ret.append(buffer);
+  AppendPrintableOptions(ret);
+  return ret;
+}
+
+int GetDefaultCacheShardBits(size_t capacity, size_t min_shard_size) {
+  int num_shard_bits = 0;
+  size_t num_shards = capacity / min_shard_size;
+  while (num_shards >>= 1) {
+    if (++num_shard_bits >= 6) {
+      // No more than 6.
+      return num_shard_bits;
+    }
+  }
+  return num_shard_bits;
+}
+
+int ShardedCacheBase::GetNumShardBits() const {
+  return BitsSetToOne(shard_mask_);
+}
+
+uint32_t ShardedCacheBase::GetNumShards() const { return shard_mask_ + 1; }
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/sharded_cache.h
+++ b/cache/sharded_cache.h
@ -0,0 +1,309 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <string>
+
+#include "port/lang.h"
+#include "port/port.h"
+#include "rocksdb/advanced_cache.h"
+#include "util/hash.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Optional base class for classes implementing the CacheShard concept
+class CacheShardBase {
+ public:
+  explicit CacheShardBase(CacheMetadataChargePolicy metadata_charge_policy)
+      : metadata_charge_policy_(metadata_charge_policy) {}
+
+  using DeleterFn = Cache::DeleterFn;
+
+  // Expected by concept CacheShard (TODO with C++20 support)
+  // Some Defaults
+  std::string GetPrintableOptions() const { return ""; }
+  using HashVal = uint64_t;
+  using HashCref = uint64_t;
+  static inline HashVal ComputeHash(const Slice& key, uint32_t seed) {
+    return GetSliceNPHash64(key, seed);
+  }
+  static inline uint32_t HashPieceForSharding(HashCref hash) {
+    return Lower32of64(hash);
+  }
+  void AppendPrintableOptions(std::string& /*str*/) const {}
+
+  // Must be provided for concept CacheShard (TODO with C++20 support)
+  /*
+  struct HandleImpl {  // for concept HandleImpl
+    HashVal hash;
+    HashCref GetHash() const;
+    ...
+  };
+  Status Insert(const Slice& key, HashCref hash, Cache::ObjectPtr value,
+                const Cache::CacheItemHelper* helper, size_t charge,
+                HandleImpl** handle, Cache::Priority priority,
+                bool standalone) = 0;
+  Handle* CreateStandalone(const Slice& key, HashCref hash, ObjectPtr obj,
+                           const CacheItemHelper* helper,
+                           size_t charge, bool allow_uncharged) = 0;
+  HandleImpl* Lookup(const Slice& key, HashCref hash,
+                        const Cache::CacheItemHelper* helper,
+                        Cache::CreateContext* create_context,
+                        Cache::Priority priority,
+                        Statistics* stats) = 0;
+  bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref) = 0;
+  bool Ref(HandleImpl* handle) = 0;
+  void Erase(const Slice& key, HashCref hash) = 0;
+  void SetCapacity(size_t capacity) = 0;
+  void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
+  size_t GetUsage() const = 0;
+  size_t GetPinnedUsage() const = 0;
+  size_t GetOccupancyCount() const = 0;
+  size_t GetTableAddressCount() const = 0;
+  // Handles iterating over roughly `average_entries_per_lock` entries, using
+  // `state` to somehow record where it last ended up. Caller initially uses
+  // *state == 0 and implementation sets *state = SIZE_MAX to indicate
+  // completion.
+  void ApplyToSomeEntries(
+      const std::function<void(const Slice& key, ObjectPtr value,
+                               size_t charge,
+                               const Cache::CacheItemHelper* helper)>& callback,
+      size_t average_entries_per_lock, size_t* state) = 0;
+  void EraseUnRefEntries() = 0;
+  */
+
+ protected:
+  const CacheMetadataChargePolicy metadata_charge_policy_;
+};
+
+// Portions of ShardedCache that do not depend on the template parameter
+class ShardedCacheBase : public Cache {
+ public:
+  explicit ShardedCacheBase(const ShardedCacheOptions& opts);
+  virtual ~ShardedCacheBase() = default;
+
+  int GetNumShardBits() const;
+  uint32_t GetNumShards() const;
+
+  uint64_t NewId() override;
+
+  bool HasStrictCapacityLimit() const override;
+  size_t GetCapacity() const override;
+
+  using Cache::GetUsage;
+  size_t GetUsage(Handle* handle) const override;
+  std::string GetPrintableOptions() const override;
+
+  uint32_t GetHashSeed() const override { return hash_seed_; }
+
+ protected:  // fns
+  virtual void AppendPrintableOptions(std::string& str) const = 0;
+  size_t GetPerShardCapacity() const;
+  size_t ComputePerShardCapacity(size_t capacity) const;
+
+ protected:                        // data
+  std::atomic<uint64_t> last_id_;  // For NewId
+  const uint32_t shard_mask_;
+  const uint32_t hash_seed_;
+
+  // Dynamic configuration parameters, guarded by config_mutex_
+  bool strict_capacity_limit_;
+  size_t capacity_;
+  mutable port::Mutex config_mutex_;
+};
+
+// Generic cache interface that shards cache by hash of keys. 2^num_shard_bits
+// shards will be created, with capacity split evenly to each of the shards.
+// Keys are typically sharded by the lowest num_shard_bits bits of hash value
+// so that the upper bits of the hash value can keep a stable ordering of
+// table entries even as the table grows (using more upper hash bits).
+// See CacheShardBase above for what is expected of the CacheShard parameter.
+template <class CacheShard>
+class ShardedCache : public ShardedCacheBase {
+ public:
+  using HashVal = typename CacheShard::HashVal;
+  using HashCref = typename CacheShard::HashCref;
+  using HandleImpl = typename CacheShard::HandleImpl;
+
+  explicit ShardedCache(const ShardedCacheOptions& opts)
+      : ShardedCacheBase(opts),
+        shards_(reinterpret_cast<CacheShard*>(port::cacheline_aligned_alloc(
+            sizeof(CacheShard) * GetNumShards()))),
+        destroy_shards_in_dtor_(false) {}
+
+  virtual ~ShardedCache() {
+    if (destroy_shards_in_dtor_) {
+      ForEachShard([](CacheShard* cs) { cs->~CacheShard(); });
+    }
+    port::cacheline_aligned_free(shards_);
+  }
+
+  CacheShard& GetShard(HashCref hash) {
+    return shards_[CacheShard::HashPieceForSharding(hash) & shard_mask_];
+  }
+
+  const CacheShard& GetShard(HashCref hash) const {
+    return shards_[CacheShard::HashPieceForSharding(hash) & shard_mask_];
+  }
+
+  void SetCapacity(size_t capacity) override {
+    MutexLock l(&config_mutex_);
+    capacity_ = capacity;
+    auto per_shard = ComputePerShardCapacity(capacity);
+    ForEachShard([=](CacheShard* cs) { cs->SetCapacity(per_shard); });
+  }
+
+  void SetStrictCapacityLimit(bool s_c_l) override {
+    MutexLock l(&config_mutex_);
+    strict_capacity_limit_ = s_c_l;
+    ForEachShard(
+        [s_c_l](CacheShard* cs) { cs->SetStrictCapacityLimit(s_c_l); });
+  }
+
+  Status Insert(const Slice& key, ObjectPtr obj, const CacheItemHelper* helper,
+                size_t charge, Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override {
+    assert(helper);
+    HashVal hash = CacheShard::ComputeHash(key, hash_seed_);
+    auto h_out = reinterpret_cast<HandleImpl**>(handle);
+    return GetShard(hash).Insert(key, hash, obj, helper, charge, h_out,
+                                 priority);
+  }
+
+  Handle* CreateStandalone(const Slice& key, ObjectPtr obj,
+                           const CacheItemHelper* helper, size_t charge,
+                           bool allow_uncharged) override {
+    assert(helper);
+    HashVal hash = CacheShard::ComputeHash(key, hash_seed_);
+    HandleImpl* result = GetShard(hash).CreateStandalone(
+        key, hash, obj, helper, charge, allow_uncharged);
+    return reinterpret_cast<Handle*>(result);
+  }
+
+  Handle* Lookup(const Slice& key, const CacheItemHelper* helper = nullptr,
+                 CreateContext* create_context = nullptr,
+                 Priority priority = Priority::LOW,
+                 Statistics* stats = nullptr) override {
+    HashVal hash = CacheShard::ComputeHash(key, hash_seed_);
+    HandleImpl* result = GetShard(hash).Lookup(key, hash, helper,
+                                               create_context, priority, stats);
+    return reinterpret_cast<Handle*>(result);
+  }
+
+  void Erase(const Slice& key) override {
+    HashVal hash = CacheShard::ComputeHash(key, hash_seed_);
+    GetShard(hash).Erase(key, hash);
+  }
+
+  bool Release(Handle* handle, bool useful,
+               bool erase_if_last_ref = false) override {
+    auto h = reinterpret_cast<HandleImpl*>(handle);
+    return GetShard(h->GetHash()).Release(h, useful, erase_if_last_ref);
+  }
+  bool Ref(Handle* handle) override {
+    auto h = reinterpret_cast<HandleImpl*>(handle);
+    return GetShard(h->GetHash()).Ref(h);
+  }
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override {
+    return Release(handle, true /*useful*/, erase_if_last_ref);
+  }
+  using ShardedCacheBase::GetUsage;
+  size_t GetUsage() const override {
+    return SumOverShards2(&CacheShard::GetUsage);
+  }
+  size_t GetPinnedUsage() const override {
+    return SumOverShards2(&CacheShard::GetPinnedUsage);
+  }
+  size_t GetOccupancyCount() const override {
+    return SumOverShards2(&CacheShard::GetOccupancyCount);
+  }
+  size_t GetTableAddressCount() const override {
+    return SumOverShards2(&CacheShard::GetTableAddressCount);
+  }
+  void ApplyToAllEntries(
+      const std::function<void(const Slice& key, ObjectPtr value, size_t charge,
+                               const CacheItemHelper* helper)>& callback,
+      const ApplyToAllEntriesOptions& opts) override {
+    uint32_t num_shards = GetNumShards();
+    // Iterate over part of each shard, rotating between shards, to
+    // minimize impact on latency of concurrent operations.
+    std::unique_ptr<size_t[]> states(new size_t[num_shards]{});
+
+    size_t aepl = opts.average_entries_per_lock;
+    aepl = std::min(aepl, size_t{1});
+
+    bool remaining_work;
+    do {
+      remaining_work = false;
+      for (uint32_t i = 0; i < num_shards; i++) {
+        if (states[i] != SIZE_MAX) {
+          shards_[i].ApplyToSomeEntries(callback, aepl, &states[i]);
+          remaining_work |= states[i] != SIZE_MAX;
+        }
+      }
+    } while (remaining_work);
+  }
+
+  virtual void EraseUnRefEntries() override {
+    ForEachShard([](CacheShard* cs) { cs->EraseUnRefEntries(); });
+  }
+
+  void DisownData() override {
+    // Leak data only if that won't generate an ASAN/valgrind warning.
+    if (!kMustFreeHeapAllocations) {
+      destroy_shards_in_dtor_ = false;
+    }
+  }
+
+ protected:
+  inline void ForEachShard(const std::function<void(CacheShard*)>& fn) {
+    uint32_t num_shards = GetNumShards();
+    for (uint32_t i = 0; i < num_shards; i++) {
+      fn(shards_ + i);
+    }
+  }
+
+  inline size_t SumOverShards(
+      const std::function<size_t(CacheShard&)>& fn) const {
+    uint32_t num_shards = GetNumShards();
+    size_t result = 0;
+    for (uint32_t i = 0; i < num_shards; i++) {
+      result += fn(shards_[i]);
+    }
+    return result;
+  }
+
+  inline size_t SumOverShards2(size_t (CacheShard::*fn)() const) const {
+    return SumOverShards([fn](CacheShard& cs) { return (cs.*fn)(); });
+  }
+
+  // Must be called exactly once by derived class constructor
+  void InitShards(const std::function<void(CacheShard*)>& placement_new) {
+    ForEachShard(placement_new);
+    destroy_shards_in_dtor_ = true;
+  }
+
+  void AppendPrintableOptions(std::string& str) const override {
+    shards_[0].AppendPrintableOptions(str);
+  }
+
+ private:
+  CacheShard* const shards_;
+  bool destroy_shards_in_dtor_;
+};
+
+// 512KB is traditional minimum shard size.
+int GetDefaultCacheShardBits(size_t capacity,
+                             size_t min_shard_size = 512U * 1024U);
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cache/typed_cache.h
+++ b/cache/typed_cache.h
@ -0,0 +1,375 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// APIs for accessing Cache in a type-safe and convenient way. Cache is kept
+// at a low, thin level of abstraction so that different implementations can
+// be plugged in, but these wrappers provide clean, convenient access to the
+// most common operations.
+//
+// A number of template classes are needed for sharing common structure. The
+// key classes are these:
+//
+// * PlaceholderCacheInterface - Used for making cache reservations, with
+// entries that have a charge but no value.
+// * BasicTypedCacheInterface<TValue> - Used for primary cache storage of
+// objects of type TValue.
+// * FullTypedCacheHelper<TValue, TCreateContext> - Used for secondary cache
+// compatible storage of objects of type TValue.
+// * For each of these, there's a "Shared" version
+// (e.g. FullTypedSharedCacheInterface) that holds a shared_ptr to the Cache,
+// rather than assuming external ownership by holding only a raw `Cache*`.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include "cache/cache_helpers.h"
+#include "rocksdb/advanced_cache.h"
+#include "rocksdb/advanced_options.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// For future consideration:
+// * Pass in value to Insert with std::unique_ptr& to simplify ownership
+//   transfer logic in callers
+// * Make key type a template parameter (e.g. useful for table cache)
+// * Closer integration with CacheHandleGuard (opt-in, so not always
+//   paying the extra overhead)
+
+#define CACHE_TYPE_DEFS()                     \
+  using Priority = Cache::Priority;           \
+  using Handle = Cache::Handle;               \
+  using ObjectPtr = Cache::ObjectPtr;         \
+  using CreateContext = Cache::CreateContext; \
+  using CacheItemHelper = Cache::CacheItemHelper /* caller ; */
+
+template <typename CachePtr>
+class BaseCacheInterface {
+ public:
+  CACHE_TYPE_DEFS();
+
+  /*implicit*/ BaseCacheInterface(CachePtr cache) : cache_(std::move(cache)) {}
+
+  inline void Release(Handle* handle) { cache_->Release(handle); }
+
+  inline void ReleaseAndEraseIfLastRef(Handle* handle) {
+    cache_->Release(handle, /*erase_if_last_ref*/ true);
+  }
+
+  inline void RegisterReleaseAsCleanup(Handle* handle, Cleanable& cleanable) {
+    cleanable.RegisterCleanup(&ReleaseCacheHandleCleanup, get(), handle);
+  }
+
+  inline Cache* get() const { return &*cache_; }
+
+  explicit inline operator bool() const noexcept { return cache_ != nullptr; }
+
+ protected:
+  CachePtr cache_;
+};
+
+// PlaceholderCacheInterface - Used for making cache reservations, with
+// entries that have a charge but no value. CacheEntryRole is required as
+// a template parameter.
+template <CacheEntryRole kRole, typename CachePtr = Cache*>
+class PlaceholderCacheInterface : public BaseCacheInterface<CachePtr> {
+ public:
+  CACHE_TYPE_DEFS();
+  using BaseCacheInterface<CachePtr>::BaseCacheInterface;
+
+  inline Status Insert(const Slice& key, size_t charge, Handle** handle) {
+    return this->cache_->Insert(key, /*value=*/nullptr, GetHelper(), charge,
+                                handle);
+  }
+
+  static const Cache::CacheItemHelper* GetHelper() {
+    static const Cache::CacheItemHelper kHelper{kRole};
+    return &kHelper;
+  }
+};
+
+template <CacheEntryRole kRole>
+using PlaceholderSharedCacheInterface =
+    PlaceholderCacheInterface<kRole, std::shared_ptr<Cache>>;
+
+template <class TValue>
+class BasicTypedCacheHelperFns {
+ public:
+  CACHE_TYPE_DEFS();
+  // E.g. char* for char[]
+  using TValuePtr = std::remove_extent_t<TValue>*;
+
+ protected:
+  inline static ObjectPtr UpCastValue(TValuePtr value) { return value; }
+  inline static TValuePtr DownCastValue(ObjectPtr value) {
+    return static_cast<TValuePtr>(value);
+  }
+
+  static void Delete(ObjectPtr value, MemoryAllocator* allocator) {
+    // FIXME: Currently, no callers actually allocate the ObjectPtr objects
+    // using the custom allocator, just subobjects that keep a reference to
+    // the allocator themselves (with CacheAllocationPtr).
+    if (/*DISABLED*/ false && allocator) {
+      if constexpr (std::is_destructible_v<TValue>) {
+        DownCastValue(value)->~TValue();
+      }
+      allocator->Deallocate(value);
+    } else {
+      // Like delete but properly handles TValue=char[] etc.
+      std::default_delete<TValue>{}(DownCastValue(value));
+    }
+  }
+};
+
+// In its own class to try to minimize the number of distinct CacheItemHelper
+// instances (e.g. don't vary by CachePtr)
+template <class TValue, CacheEntryRole kRole>
+class BasicTypedCacheHelper : public BasicTypedCacheHelperFns<TValue> {
+ public:
+  static const Cache::CacheItemHelper* GetBasicHelper() {
+    static const Cache::CacheItemHelper kHelper{kRole,
+                                                &BasicTypedCacheHelper::Delete};
+    return &kHelper;
+  }
+};
+
+// BasicTypedCacheInterface - Used for primary cache storage of objects of
+// type TValue, which can be cleaned up with std::default_delete<TValue>. The
+// role is provided by TValue::kCacheEntryRole or given in an optional
+// template parameter.
+template <class TValue, CacheEntryRole kRole = TValue::kCacheEntryRole,
+          typename CachePtr = Cache*>
+class BasicTypedCacheInterface : public BaseCacheInterface<CachePtr>,
+                                 public BasicTypedCacheHelper<TValue, kRole> {
+ public:
+  CACHE_TYPE_DEFS();
+  using typename BasicTypedCacheHelperFns<TValue>::TValuePtr;
+  struct TypedHandle : public Handle {};
+  using BasicTypedCacheHelper<TValue, kRole>::GetBasicHelper;
+  // ctor
+  using BaseCacheInterface<CachePtr>::BaseCacheInterface;
+  struct TypedAsyncLookupHandle : public Cache::AsyncLookupHandle {
+    TypedHandle* Result() {
+      return reinterpret_cast<TypedHandle*>(Cache::AsyncLookupHandle::Result());
+    }
+  };
+
+  inline Status Insert(const Slice& key, TValuePtr value, size_t charge,
+                       TypedHandle** handle = nullptr,
+                       Priority priority = Priority::LOW) {
+    auto untyped_handle = reinterpret_cast<Handle**>(handle);
+    return this->cache_->Insert(
+        key, BasicTypedCacheHelperFns<TValue>::UpCastValue(value),
+        GetBasicHelper(), charge, untyped_handle, priority);
+  }
+
+  inline TypedHandle* Lookup(const Slice& key, Statistics* stats = nullptr) {
+    return reinterpret_cast<TypedHandle*>(
+        this->cache_->BasicLookup(key, stats));
+  }
+
+  inline void StartAsyncLookup(TypedAsyncLookupHandle& async_handle) {
+    assert(async_handle.helper == nullptr);
+    this->cache_->StartAsyncLookup(async_handle);
+  }
+
+  inline CacheHandleGuard<TValue> Guard(TypedHandle* handle) {
+    if (handle) {
+      return CacheHandleGuard<TValue>(&*this->cache_, handle);
+    } else {
+      return {};
+    }
+  }
+
+  inline std::shared_ptr<TValue> SharedGuard(TypedHandle* handle) {
+    if (handle) {
+      return MakeSharedCacheHandleGuard<TValue>(&*this->cache_, handle);
+    } else {
+      return {};
+    }
+  }
+
+  inline TValuePtr Value(TypedHandle* handle) {
+    return BasicTypedCacheHelperFns<TValue>::DownCastValue(
+        this->cache_->Value(handle));
+  }
+};
+
+// BasicTypedSharedCacheInterface - Like BasicTypedCacheInterface but with a
+// shared_ptr<Cache> for keeping Cache alive.
+template <class TValue, CacheEntryRole kRole = TValue::kCacheEntryRole>
+using BasicTypedSharedCacheInterface =
+    BasicTypedCacheInterface<TValue, kRole, std::shared_ptr<Cache>>;
+
+// TValue must implement ContentSlice() and ~TValue
+// TCreateContext must implement Create(std::unique_ptr<TValue>*, ...)
+template <class TValue, class TCreateContext>
+class FullTypedCacheHelperFns : public BasicTypedCacheHelperFns<TValue> {
+ public:
+  CACHE_TYPE_DEFS();
+
+ protected:
+  using typename BasicTypedCacheHelperFns<TValue>::TValuePtr;
+  using BasicTypedCacheHelperFns<TValue>::DownCastValue;
+  using BasicTypedCacheHelperFns<TValue>::UpCastValue;
+
+  static size_t Size(ObjectPtr v) {
+    TValuePtr value = DownCastValue(v);
+    auto slice = value->ContentSlice();
+    return slice.size();
+  }
+
+  static Status SaveTo(ObjectPtr v, size_t from_offset, size_t length,
+                       char* out) {
+    TValuePtr value = DownCastValue(v);
+    auto slice = value->ContentSlice();
+    assert(from_offset < slice.size());
+    assert(from_offset + length <= slice.size());
+    std::copy_n(slice.data() + from_offset, length, out);
+    return Status::OK();
+  }
+
+  static Status Create(const Slice& data, CreateContext* context,
+                       MemoryAllocator* allocator, ObjectPtr* out_obj,
+                       size_t* out_charge) {
+    std::unique_ptr<TValue> value = nullptr;
+    if constexpr (sizeof(TCreateContext) > 0) {
+      TCreateContext* tcontext = static_cast<TCreateContext*>(context);
+      tcontext->Create(&value, out_charge, data, allocator);
+    } else {
+      TCreateContext::Create(&value, out_charge, data, allocator);
+    }
+    *out_obj = UpCastValue(value.release());
+    return Status::OK();
+  }
+};
+
+// In its own class to try to minimize the number of distinct CacheItemHelper
+// instances (e.g. don't vary by CachePtr)
+template <class TValue, class TCreateContext, CacheEntryRole kRole>
+class FullTypedCacheHelper
+    : public FullTypedCacheHelperFns<TValue, TCreateContext> {
+ public:
+  static const Cache::CacheItemHelper* GetFullHelper() {
+    static const Cache::CacheItemHelper kHelper{
+        kRole,
+        &FullTypedCacheHelper::Delete,
+        &FullTypedCacheHelper::Size,
+        &FullTypedCacheHelper::SaveTo,
+        &FullTypedCacheHelper::Create,
+        BasicTypedCacheHelper<TValue, kRole>::GetBasicHelper()};
+    return &kHelper;
+  }
+};
+
+// FullTypedCacheHelper - Used for secondary cache compatible storage of
+// objects of type TValue. In addition to BasicTypedCacheInterface constraints,
+// we require TValue::ContentSlice() to return persistable data. This
+// simplifies usage for the normal case of simple secondary cache compatibility
+// (can give you a Slice to the data already in memory). In addition to
+// TCreateContext performing the role of Cache::CreateContext, it is also
+// expected to provide a function Create(std::unique_ptr<TValue>* value,
+// size_t* out_charge, const Slice& data, MemoryAllocator* allocator) for
+// creating new TValue.
+template <class TValue, class TCreateContext,
+          CacheEntryRole kRole = TValue::kCacheEntryRole,
+          typename CachePtr = Cache*>
+class FullTypedCacheInterface
+    : public BasicTypedCacheInterface<TValue, kRole, CachePtr>,
+      public FullTypedCacheHelper<TValue, TCreateContext, kRole> {
+ public:
+  CACHE_TYPE_DEFS();
+  using typename BasicTypedCacheInterface<TValue, kRole, CachePtr>::TypedHandle;
+  using typename BasicTypedCacheInterface<TValue, kRole,
+                                          CachePtr>::TypedAsyncLookupHandle;
+  using typename BasicTypedCacheHelperFns<TValue>::TValuePtr;
+  using BasicTypedCacheHelper<TValue, kRole>::GetBasicHelper;
+  using FullTypedCacheHelper<TValue, TCreateContext, kRole>::GetFullHelper;
+  using BasicTypedCacheHelperFns<TValue>::UpCastValue;
+  using BasicTypedCacheHelperFns<TValue>::DownCastValue;
+  // ctor
+  using BasicTypedCacheInterface<TValue, kRole,
+                                 CachePtr>::BasicTypedCacheInterface;
+
+  // Insert with SecondaryCache compatibility (subject to CacheTier).
+  // (Basic Insert() also inherited.)
+  inline Status InsertFull(
+      const Slice& key, TValuePtr value, size_t charge,
+      TypedHandle** handle = nullptr, Priority priority = Priority::LOW,
+      CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) {
+    auto untyped_handle = reinterpret_cast<Handle**>(handle);
+    auto helper = lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier
+                      ? GetFullHelper()
+                      : GetBasicHelper();
+    return this->cache_->Insert(key, UpCastValue(value), helper, charge,
+                                untyped_handle, priority);
+  }
+
+  // Like SecondaryCache::InsertSaved, with SecondaryCache compatibility
+  // (subject to CacheTier).
+  inline Status InsertSaved(
+      const Slice& key, const Slice& data, TCreateContext* create_context,
+      Priority priority = Priority::LOW,
+      CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier,
+      size_t* out_charge = nullptr) {
+    ObjectPtr value;
+    size_t charge;
+    Status st = GetFullHelper()->create_cb(data, create_context,
+                                           this->cache_->memory_allocator(),
+                                           &value, &charge);
+    if (out_charge) {
+      *out_charge = charge;
+    }
+    if (st.ok()) {
+      st = InsertFull(key, DownCastValue(value), charge, nullptr /*handle*/,
+                      priority, lowest_used_cache_tier);
+    } else {
+      GetFullHelper()->del_cb(value, this->cache_->memory_allocator());
+    }
+    return st;
+  }
+
+  // Lookup with SecondaryCache support (subject to CacheTier).
+  // (Basic Lookup() also inherited.)
+  inline TypedHandle* LookupFull(
+      const Slice& key, TCreateContext* create_context = nullptr,
+      Priority priority = Priority::LOW, Statistics* stats = nullptr,
+      CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) {
+    if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) {
+      return reinterpret_cast<TypedHandle*>(this->cache_->Lookup(
+          key, GetFullHelper(), create_context, priority, stats));
+    } else {
+      return BasicTypedCacheInterface<TValue, kRole, CachePtr>::Lookup(key,
+                                                                       stats);
+    }
+  }
+
+  inline void StartAsyncLookupFull(
+      TypedAsyncLookupHandle& async_handle,
+      CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) {
+    if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) {
+      async_handle.helper = GetFullHelper();
+      this->cache_->StartAsyncLookup(async_handle);
+    } else {
+      BasicTypedCacheInterface<TValue, kRole, CachePtr>::StartAsyncLookup(
+          async_handle);
+    }
+  }
+};
+
+// FullTypedSharedCacheInterface - Like FullTypedCacheInterface but with a
+// shared_ptr<Cache> for keeping Cache alive.
+template <class TValue, class TCreateContext,
+          CacheEntryRole kRole = TValue::kCacheEntryRole>
+using FullTypedSharedCacheInterface =
+    FullTypedCacheInterface<TValue, TCreateContext, kRole,
+                            std::shared_ptr<Cache>>;
+
+#undef CACHE_TYPE_DEFS
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/cmake/RocksDBConfig.cmake.in
+++ b/cmake/RocksDBConfig.cmake.in
@ -0,0 +1,54 @@
+@PACKAGE_INIT@
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules")
+
+include(CMakeFindDependencyMacro)
+
+set(GFLAGS_USE_TARGET_NAMESPACE @GFLAGS_USE_TARGET_NAMESPACE@)
+
+if(@WITH_JEMALLOC@)
+  find_dependency(JeMalloc)
+endif()
+
+if(@WITH_GFLAGS@)
+  find_dependency(gflags CONFIG)
+  if(NOT gflags_FOUND)
+    find_dependency(gflags)
+  endif()
+endif()
+
+if(@WITH_SNAPPY@)
+  find_dependency(Snappy CONFIG)
+  if(NOT Snappy_FOUND)
+    find_dependency(Snappy)
+  endif()
+endif()
+
+if(@WITH_ZLIB@)
+  find_dependency(ZLIB)
+endif()
+
+if(@WITH_BZ2@)
+  find_dependency(BZip2)
+endif()
+
+if(@WITH_LZ4@)
+  find_dependency(lz4)
+endif()
+
+if(@WITH_ZSTD@)
+  find_dependency(zstd)
+endif()
+
+if(@WITH_NUMA@)
+  find_dependency(NUMA)
+endif()
+
+if(@WITH_TBB@)
+  find_dependency(TBB)
+endif()
+
+find_dependency(Threads)
+
+include("${CMAKE_CURRENT_LIST_DIR}/RocksDBTargets.cmake")
+check_required_components(RocksDB)
--- a/cmake/modules/CxxFlags.cmake
+++ b/cmake/modules/CxxFlags.cmake
@ -0,0 +1,7 @@
+macro(get_cxx_std_flags FLAGS_VARIABLE)
+  if( CMAKE_CXX_STANDARD_REQUIRED )
+    set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_STANDARD_COMPILE_OPTION})
+  else()
+    set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_EXTENSION_COMPILE_OPTION})
+  endif()
+endmacro()
--- a/cmake/modules/FindJeMalloc.cmake
+++ b/cmake/modules/FindJeMalloc.cmake
@ -0,0 +1,29 @@
+# - Find JeMalloc library
+# Find the native JeMalloc includes and library
+#
+# JeMalloc_INCLUDE_DIRS - where to find jemalloc.h, etc.
+# JeMalloc_LIBRARIES - List of libraries when using jemalloc.
+# JeMalloc_FOUND - True if jemalloc found.
+
+find_path(JeMalloc_INCLUDE_DIRS
+  NAMES jemalloc/jemalloc.h
+  HINTS ${JEMALLOC_ROOT_DIR}/include)
+
+find_library(JeMalloc_LIBRARIES
+  NAMES jemalloc
+  HINTS ${JEMALLOC_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(JeMalloc DEFAULT_MSG JeMalloc_LIBRARIES JeMalloc_INCLUDE_DIRS)
+
+mark_as_advanced(
+  JeMalloc_LIBRARIES
+  JeMalloc_INCLUDE_DIRS)
+
+if(JeMalloc_FOUND AND NOT (TARGET JeMalloc::JeMalloc))
+  add_library (JeMalloc::JeMalloc UNKNOWN IMPORTED)
+  set_target_properties(JeMalloc::JeMalloc
+    PROPERTIES
+      IMPORTED_LOCATION ${JeMalloc_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${JeMalloc_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/FindNUMA.cmake
+++ b/cmake/modules/FindNUMA.cmake
@ -0,0 +1,29 @@
+# - Find NUMA
+# Find the NUMA library and includes
+#
+# NUMA_INCLUDE_DIRS - where to find numa.h, etc.
+# NUMA_LIBRARIES - List of libraries when using NUMA.
+# NUMA_FOUND - True if NUMA found.
+
+find_path(NUMA_INCLUDE_DIRS
+  NAMES numa.h numaif.h
+  HINTS ${NUMA_ROOT_DIR}/include)
+
+find_library(NUMA_LIBRARIES
+  NAMES numa
+  HINTS ${NUMA_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_LIBRARIES NUMA_INCLUDE_DIRS)
+
+mark_as_advanced(
+  NUMA_LIBRARIES
+  NUMA_INCLUDE_DIRS)
+
+if(NUMA_FOUND AND NOT (TARGET NUMA::NUMA))
+  add_library (NUMA::NUMA UNKNOWN IMPORTED)
+  set_target_properties(NUMA::NUMA
+    PROPERTIES
+      IMPORTED_LOCATION ${NUMA_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${NUMA_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/FindSnappy.cmake
+++ b/cmake/modules/FindSnappy.cmake
@ -0,0 +1,29 @@
+# - Find Snappy
+# Find the snappy compression library and includes
+#
+# Snappy_INCLUDE_DIRS - where to find snappy.h, etc.
+# Snappy_LIBRARIES - List of libraries when using snappy.
+# Snappy_FOUND - True if snappy found.
+
+find_path(Snappy_INCLUDE_DIRS
+  NAMES snappy.h
+  HINTS ${snappy_ROOT_DIR}/include)
+
+find_library(Snappy_LIBRARIES
+  NAMES snappy
+  HINTS ${snappy_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_LIBRARIES Snappy_INCLUDE_DIRS)
+
+mark_as_advanced(
+  Snappy_LIBRARIES
+  Snappy_INCLUDE_DIRS)
+
+if(Snappy_FOUND AND NOT (TARGET Snappy::snappy))
+  add_library (Snappy::snappy UNKNOWN IMPORTED)
+  set_target_properties(Snappy::snappy
+    PROPERTIES
+      IMPORTED_LOCATION ${Snappy_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${Snappy_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/FindTBB.cmake
+++ b/cmake/modules/FindTBB.cmake
@ -0,0 +1,33 @@
+# - Find TBB
+# Find the Thread Building Blocks library and includes
+#
+# TBB_INCLUDE_DIRS - where to find tbb.h, etc.
+# TBB_LIBRARIES - List of libraries when using TBB.
+# TBB_FOUND - True if TBB found.
+
+if(NOT DEFINED TBB_ROOT_DIR)
+  set(TBB_ROOT_DIR "$ENV{TBBROOT}")
+endif()
+
+find_path(TBB_INCLUDE_DIRS
+  NAMES tbb/tbb.h
+  HINTS ${TBB_ROOT_DIR}/include)
+
+find_library(TBB_LIBRARIES
+  NAMES tbb
+  HINTS ${TBB_ROOT_DIR}/lib ENV LIBRARY_PATH)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(TBB DEFAULT_MSG TBB_LIBRARIES TBB_INCLUDE_DIRS)
+
+mark_as_advanced(
+  TBB_LIBRARIES
+  TBB_INCLUDE_DIRS)
+
+if(TBB_FOUND AND NOT (TARGET TBB::TBB))
+  add_library (TBB::TBB UNKNOWN IMPORTED)
+  set_target_properties(TBB::TBB
+    PROPERTIES
+      IMPORTED_LOCATION ${TBB_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/Findgflags.cmake
+++ b/cmake/modules/Findgflags.cmake
@ -0,0 +1,29 @@
+# - Find gflags library
+# Find the gflags includes and library
+#
+# GFLAGS_INCLUDE_DIR - where to find gflags.h.
+# GFLAGS_LIBRARIES - List of libraries when using gflags.
+# gflags_FOUND - True if gflags found.
+
+find_path(GFLAGS_INCLUDE_DIR
+  NAMES gflags/gflags.h)
+
+find_library(GFLAGS_LIBRARIES
+  NAMES gflags)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(gflags
+  DEFAULT_MSG GFLAGS_LIBRARIES GFLAGS_INCLUDE_DIR)
+
+mark_as_advanced(
+  GFLAGS_LIBRARIES
+  GFLAGS_INCLUDE_DIR)
+
+if(gflags_FOUND AND NOT (TARGET gflags::gflags))
+  add_library(gflags::gflags UNKNOWN IMPORTED)
+  set_target_properties(gflags::gflags
+    PROPERTIES
+      IMPORTED_LOCATION ${GFLAGS_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${GFLAGS_INCLUDE_DIR}
+      IMPORTED_LINK_INTERFACE_LANGUAGES "CXX")
+endif()
--- a/cmake/modules/Findlz4.cmake
+++ b/cmake/modules/Findlz4.cmake
@ -0,0 +1,29 @@
+# - Find Lz4
+# Find the lz4 compression library and includes
+#
+# lz4_INCLUDE_DIRS - where to find lz4.h, etc.
+# lz4_LIBRARIES - List of libraries when using lz4.
+# lz4_FOUND - True if lz4 found.
+
+find_path(lz4_INCLUDE_DIRS
+  NAMES lz4.h
+  HINTS ${lz4_ROOT_DIR}/include)
+
+find_library(lz4_LIBRARIES
+  NAMES lz4
+  HINTS ${lz4_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(lz4 DEFAULT_MSG lz4_LIBRARIES lz4_INCLUDE_DIRS)
+
+mark_as_advanced(
+  lz4_LIBRARIES
+  lz4_INCLUDE_DIRS)
+
+if(lz4_FOUND AND NOT (TARGET lz4::lz4))
+  add_library(lz4::lz4 UNKNOWN IMPORTED)
+  set_target_properties(lz4::lz4
+    PROPERTIES
+      IMPORTED_LOCATION ${lz4_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${lz4_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/Finduring.cmake
+++ b/cmake/modules/Finduring.cmake
@ -0,0 +1,26 @@
+# - Find liburing
+#
+# uring_INCLUDE_DIR - Where to find liburing.h
+# uring_LIBRARIES - List of libraries when using uring.
+# uring_FOUND - True if uring found.
+
+find_path(uring_INCLUDE_DIR
+  NAMES liburing.h)
+find_library(uring_LIBRARIES
+  NAMES liburing.a liburing)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(uring
+  DEFAULT_MSG uring_LIBRARIES uring_INCLUDE_DIR)
+
+mark_as_advanced(
+  uring_INCLUDE_DIR
+  uring_LIBRARIES)
+
+if(uring_FOUND AND NOT TARGET uring::uring)
+  add_library(uring::uring UNKNOWN IMPORTED)
+  set_target_properties(uring::uring PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${uring_INCLUDE_DIR}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+    IMPORTED_LOCATION "${uring_LIBRARIES}")
+endif()
--- a/cmake/modules/Findzstd.cmake
+++ b/cmake/modules/Findzstd.cmake
@ -0,0 +1,29 @@
+# - Find zstd
+# Find the zstd compression library and includes
+#
+# zstd_INCLUDE_DIRS - where to find zstd.h, etc.
+# zstd_LIBRARIES - List of libraries when using zstd.
+# zstd_FOUND - True if zstd found.
+
+find_path(zstd_INCLUDE_DIRS
+  NAMES zstd.h
+  HINTS ${zstd_ROOT_DIR}/include)
+
+find_library(zstd_LIBRARIES
+  NAMES zstd
+  HINTS ${zstd_ROOT_DIR}/lib)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(zstd DEFAULT_MSG zstd_LIBRARIES zstd_INCLUDE_DIRS)
+
+mark_as_advanced(
+  zstd_LIBRARIES
+  zstd_INCLUDE_DIRS)
+
+if(zstd_FOUND AND NOT (TARGET zstd::zstd))
+  add_library (zstd::zstd UNKNOWN IMPORTED)
+  set_target_properties(zstd::zstd
+    PROPERTIES
+      IMPORTED_LOCATION ${zstd_LIBRARIES}
+      INTERFACE_INCLUDE_DIRECTORIES ${zstd_INCLUDE_DIRS})
+endif()
--- a/cmake/modules/ReadVersion.cmake
+++ b/cmake/modules/ReadVersion.cmake
@ -0,0 +1,10 @@
+# Read rocksdb version from version.h header file.
+
+function(get_rocksdb_version version_var)
+  file(READ "${CMAKE_CURRENT_SOURCE_DIR}/include/rocksdb/version.h" version_header_file)
+  foreach(component MAJOR MINOR PATCH)
+    string(REGEX MATCH "#define ROCKSDB_${component} ([0-9]+)" _ ${version_header_file})
+    set(ROCKSDB_VERSION_${component} ${CMAKE_MATCH_1})
+  endforeach()
+  set(${version_var} "${ROCKSDB_VERSION_MAJOR}.${ROCKSDB_VERSION_MINOR}.${ROCKSDB_VERSION_PATCH}" PARENT_SCOPE)
+endfunction()
--- a/common.mk
+++ b/common.mk
@ -0,0 +1,30 @@
+ifndef PYTHON
+
+# Default to python3. Some distros like CentOS 8 do not have `python`.
+ifeq ($(origin PYTHON), undefined)
+	PYTHON := $(shell which python3 || which python || echo python3)
+endif
+export PYTHON
+
+endif
+
+# To setup tmp directory, first recognize some old variables for setting
+# test tmp directory or base tmp directory. TEST_TMPDIR is usually read
+# by RocksDB tools though Env/FileSystem::GetTestDirectory.
+ifeq ($(TEST_TMPDIR),)
+TEST_TMPDIR := $(TMPD)
+endif
+ifeq ($(TEST_TMPDIR),)
+ifeq ($(BASE_TMPDIR),)
+BASE_TMPDIR :=$(TMPDIR)
+endif
+ifeq ($(BASE_TMPDIR),)
+BASE_TMPDIR :=/tmp
+endif
+# Use /dev/shm if it has the sticky bit set (otherwise, /tmp or other
+# base dir), and create a randomly-named rocksdb.XXXX directory therein.
+TEST_TMPDIR := $(shell f=/dev/shm; test -k $$f || f=$(BASE_TMPDIR); \
+  perl -le 'use File::Temp "tempdir";'	                            \
+    -e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)')
+endif
+export TEST_TMPDIR
--- a/coverage/coverage_test.sh
+++ b/coverage/coverage_test.sh
@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+# Exit on error.
+set -e
+
+if [ -n "$USE_CLANG" ]; then
+  echo "Error: Coverage test is supported only for gcc."
+  exit 1
+fi
+
+ROOT=".."
+# Fetch right version of gcov
+if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then
+  source $ROOT/build_tools/fbcode_config_platform010.sh
+  GCOV=$GCC_BASE/bin/gcov
+else
+  GCOV=$(which gcov)
+fi
+echo -e "Using $GCOV"
+
+COVERAGE_DIR="$PWD/COVERAGE_REPORT"
+mkdir -p $COVERAGE_DIR
+
+# Find all gcno files to generate the coverage report
+
+PYTHON=${1:-`which python3`}
+echo -e "Using $PYTHON"
+GCNO_FILES=`find $ROOT -name "*.gcno"`
+$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |
+  # Parse the raw gcov report to more human readable form.
+  $PYTHON $ROOT/coverage/parse_gcov_output.py |
+  # Write the output to both stdout and report file.
+  tee $COVERAGE_DIR/coverage_report_all.txt &&
+echo -e "Generated coverage report for all files: $COVERAGE_DIR/coverage_report_all.txt\n"
+
+# TODO: we also need to get the files of the latest commits.
+# Get the most recently committed files.
+LATEST_FILES=`
+  git show --pretty="format:" --name-only HEAD |
+  grep -v "^$" |
+  paste -s -d,`
+RECENT_REPORT=$COVERAGE_DIR/coverage_report_recent.txt
+
+echo -e "Recently updated files: $LATEST_FILES\n" > $RECENT_REPORT
+$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |
+  $PYTHON $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES |
+  tee -a $RECENT_REPORT &&
+echo -e "Generated coverage report for recently updated files: $RECENT_REPORT\n"
+
+# Unless otherwise specified, we'll not generate html report by default
+if [ -z "$HTML" ]; then
+  exit 0
+fi
+
+# Generate the html report. If we cannot find lcov in this machine, we'll simply
+# skip this step.
+echo "Generating the html coverage report..."
+
+LCOV=$(which lcov || true 2>/dev/null)
+if [ -z $LCOV ]
+then
+  echo "Skip: Cannot find lcov to generate the html report."
+  exit 0
+fi
+
+LCOV_VERSION=$(lcov -v | grep 1.1 || true)
+if [ $LCOV_VERSION ]
+then
+  echo "Not supported lcov version. Expect lcov 1.1."
+  exit 0
+fi
+
+(cd $ROOT; lcov --no-external \
+     --capture  \
+     --directory $PWD \
+     --gcov-tool $GCOV \
+     --output-file $COVERAGE_DIR/coverage.info)
+
+genhtml $COVERAGE_DIR/coverage.info -o $COVERAGE_DIR
+
+echo "HTML Coverage report is generated in $COVERAGE_DIR"
--- a/coverage/parse_gcov_output.py
+++ b/coverage/parse_gcov_output.py
@ -0,0 +1,128 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+from __future__ import print_function
+
+import optparse
+import re
+import sys
+
+# the gcov report follows certain pattern. Each file will have two lines
+# of report, from which we can extract the file name, total lines and coverage
+# percentage.
+def parse_gcov_report(gcov_input):
+    per_file_coverage = {}
+    total_coverage = None
+
+    for line in sys.stdin:
+        line = line.strip()
+
+        # --First line of the coverage report (with file name in it)?
+        match_obj = re.match("^File '(.*)'$", line)
+        if match_obj:
+            # fetch the file name from the first line of the report.
+            current_file = match_obj.group(1)
+            continue
+
+        # -- Second line of the file report (with coverage percentage)
+        match_obj = re.match("^Lines executed:(.*)% of (.*)", line)
+
+        if match_obj:
+            coverage = float(match_obj.group(1))
+            lines = int(match_obj.group(2))
+
+            if current_file is not None:
+                per_file_coverage[current_file] = (coverage, lines)
+                current_file = None
+            else:
+                # If current_file is not set, we reach the last line of report,
+                # which contains the summarized coverage percentage.
+                total_coverage = (coverage, lines)
+            continue
+
+        # If the line's pattern doesn't fall into the above categories. We
+        # can simply ignore them since they're either empty line or doesn't
+        # find executable lines of the given file.
+        current_file = None
+
+    return per_file_coverage, total_coverage
+
+
+def get_option_parser():
+    usage = (
+        "Parse the gcov output and generate more human-readable code "
+        + "coverage report."
+    )
+    parser = optparse.OptionParser(usage)
+
+    parser.add_option(
+        "--interested-files",
+        "-i",
+        dest="filenames",
+        help="Comma separated files names. if specified, we will display "
+        + "the coverage report only for interested source files. "
+        + "Otherwise we will display the coverage report for all "
+        + "source files.",
+    )
+    return parser
+
+
+def display_file_coverage(per_file_coverage, total_coverage):
+    # To print out auto-adjustable column, we need to know the longest
+    # length of file names.
+    max_file_name_length = max(len(fname) for fname in per_file_coverage.keys())
+
+    # -- Print header
+    # size of separator is determined by 3 column sizes:
+    # file name, coverage percentage and lines.
+    header_template = "%" + str(max_file_name_length) + "s\t%s\t%s"
+    separator = "-" * (max_file_name_length + 10 + 20)
+    print(
+        header_template % ("Filename", "Coverage", "Lines")
+    )  # noqa: E999 T25377293 Grandfathered in
+    print(separator)
+
+    # -- Print body
+    # template for printing coverage report for each file.
+    record_template = "%" + str(max_file_name_length) + "s\t%5.2f%%\t%10d"
+
+    for fname, coverage_info in per_file_coverage.items():
+        coverage, lines = coverage_info
+        print(record_template % (fname, coverage, lines))
+
+    # -- Print footer
+    if total_coverage:
+        print(separator)
+        print(record_template % ("Total", total_coverage[0], total_coverage[1]))
+
+
+def report_coverage():
+    parser = get_option_parser()
+    (options, args) = parser.parse_args()
+
+    interested_files = set()
+    if options.filenames is not None:
+        interested_files = {f.strip() for f in options.filenames.split(",")}
+
+    # To make things simple, right now we only read gcov report from the input
+    per_file_coverage, total_coverage = parse_gcov_report(sys.stdin)
+
+    # Check if we need to display coverage info for interested files.
+    if len(interested_files):
+        per_file_coverage = dict(
+            (fname, per_file_coverage[fname])
+            for fname in interested_files
+            if fname in per_file_coverage
+        )
+        # If we only interested in several files, it makes no sense to report
+        # the total_coverage
+        total_coverage = None
+
+    if not len(per_file_coverage):
+        print("Cannot find coverage info for the given files.", file=sys.stderr)
+        return
+    display_file_coverage(per_file_coverage, total_coverage)
+
+
+if __name__ == "__main__":
+    report_coverage()
--- a/crash_test.mk
+++ b/crash_test.mk
@ -0,0 +1,121 @@
+# This file is used by Meta-internal infrastructure as well as by Makefile
+
+# When included from Makefile, there are rules to build DB_STRESS_CMD. When
+# used directly with `make -f crashtest.mk ...` there will be no rules to
+# build DB_STRESS_CMD so it must exist prior.
+DB_STRESS_CMD?=./db_stress
+
+include common.mk
+
+CRASHTEST_MAKE=$(MAKE) -f crash_test.mk
+CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD) --cleanup_cmd='$(DB_CLEANUP_CMD)'
+
+.PHONY: crash_test crash_test_with_atomic_flush crash_test_with_txn \
+	crash_test_with_best_efforts_recovery crash_test_with_ts \
+	blackbox_crash_test blackbox_crash_test_with_atomic_flush \
+	blackbox_crash_test_with_txn blackbox_crash_test_with_ts \
+	blackbox_crash_test_with_best_efforts_recovery \
+	whitebox_crash_test whitebox_crash_test_with_atomic_flush \
+	whitebox_crash_test_with_txn whitebox_crash_test_with_ts \
+	blackbox_crash_test_with_multiops_wc_txn \
+	blackbox_crash_test_with_multiops_wp_txn \
+	crash_test_with_tiered_storage blackbox_crash_test_with_tiered_storage \
+	whitebox_crash_test_with_tiered_storage \
+	whitebox_crash_test_with_optimistic_txn \
+	blackbox_crash_test_with_optimistic_txn \
+
+crash_test: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test
+	$(CRASHTEST_MAKE) blackbox_crash_test
+
+crash_test_with_atomic_flush: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test_with_atomic_flush
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_atomic_flush
+
+crash_test_with_txn: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test_with_txn
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_txn
+
+crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test_with_optimistic_txn
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_optimistic_txn
+
+crash_test_with_best_efforts_recovery: blackbox_crash_test_with_best_efforts_recovery
+
+crash_test_with_ts: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test_with_ts
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_ts
+
+crash_test_with_tiered_storage: $(DB_STRESS_CMD)
+# Do not parallelize
+	$(CRASHTEST_MAKE) whitebox_crash_test_with_tiered_storage
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_tiered_storage
+
+crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn
+
+crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wp_txn
+
+blackbox_crash_test: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --simple blackbox $(CRASH_TEST_EXT_ARGS)
+	$(CRASHTEST_PY) blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --txn blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_best_efforts_recovery: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --test_best_efforts_recovery blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_ts: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --enable_ts blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --test_multiops_txn --write_policy write_committed blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --test_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
+
+blackbox_crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --optimistic_txn blackbox $(CRASH_TEST_EXT_ARGS)
+
+ifeq ($(CRASH_TEST_KILL_ODD),)
+  CRASH_TEST_KILL_ODD=888887
+endif
+
+whitebox_crash_test: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --simple whitebox --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+	$(CRASHTEST_PY) whitebox  --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+
+whitebox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --cf_consistency whitebox  --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+
+whitebox_crash_test_with_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --txn whitebox --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+
+whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+
+whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --test_tiered_storage whitebox --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
+
+whitebox_crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
+	$(CRASHTEST_PY) --optimistic_txn whitebox --random_kill_odd \
+      $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
--- a/db/arena_wrapped_db_iter.cc
+++ b/db/arena_wrapped_db_iter.cc
@ -0,0 +1,165 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/arena_wrapped_db_iter.h"
+
+#include "memory/arena.h"
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "table/internal_iterator.h"
+#include "table/iterator_wrapper.h"
+#include "util/user_comparator_wrapper.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+Status ArenaWrappedDBIter::GetProperty(std::string prop_name,
+                                       std::string* prop) {
+  if (prop_name == "rocksdb.iterator.super-version-number") {
+    // First try to pass the value returned from inner iterator.
+    if (!db_iter_->GetProperty(prop_name, prop).ok()) {
+      *prop = std::to_string(sv_number_);
+    }
+    return Status::OK();
+  }
+  return db_iter_->GetProperty(prop_name, prop);
+}
+
+void ArenaWrappedDBIter::Init(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, const Version* version,
+    const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iteration,
+    uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
+    ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
+  auto mem = arena_.AllocateAligned(sizeof(DBIter));
+  db_iter_ =
+      new (mem) DBIter(env, read_options, ioptions, mutable_cf_options,
+                       ioptions.user_comparator, /* iter */ nullptr, version,
+                       sequence, true, max_sequential_skip_in_iteration,
+                       read_callback, db_impl, cfd, expose_blob_index);
+  sv_number_ = version_number;
+  read_options_ = read_options;
+  allow_refresh_ = allow_refresh;
+  memtable_range_tombstone_iter_ = nullptr;
+
+  if (!CheckFSFeatureSupport(env->GetFileSystem().get(),
+                             FSSupportedOps::kAsyncIO)) {
+    read_options_.async_io = false;
+  }
+}
+
+Status ArenaWrappedDBIter::Refresh() {
+  if (cfd_ == nullptr || db_impl_ == nullptr || !allow_refresh_) {
+    return Status::NotSupported("Creating renew iterator is not allowed.");
+  }
+  assert(db_iter_ != nullptr);
+  // TODO(yiwu): For last_seq_same_as_publish_seq_==false, this is not the
+  // correct behavior. Will be corrected automatically when we take a snapshot
+  // here for the case of WritePreparedTxnDB.
+  uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
+  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
+  TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
+  auto reinit_internal_iter = [&]() {
+    Env* env = db_iter_->env();
+    db_iter_->~DBIter();
+    arena_.~Arena();
+    new (&arena_) Arena();
+
+    SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
+    SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+    if (read_callback_) {
+      read_callback_->Refresh(latest_seq);
+    }
+    Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
+         sv->current, latest_seq,
+         sv->mutable_cf_options.max_sequential_skip_in_iterations,
+         cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
+         allow_refresh_);
+
+    InternalIterator* internal_iter = db_impl_->NewInternalIterator(
+        read_options_, cfd_, sv, &arena_, latest_seq,
+        /* allow_unprepared_value */ true, /* db_iter */ this);
+    SetIterUnderDBIter(internal_iter);
+  };
+  while (true) {
+    if (sv_number_ != cur_sv_number) {
+      reinit_internal_iter();
+      break;
+    } else {
+      SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+      // Refresh range-tombstones in MemTable
+      if (!read_options_.ignore_range_deletions) {
+        SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
+        TEST_SYNC_POINT_CALLBACK("ArenaWrappedDBIter::Refresh:SV", nullptr);
+        auto t = sv->mem->NewRangeTombstoneIterator(
+            read_options_, latest_seq, false /* immutable_memtable */);
+        if (!t || t->empty()) {
+          // If memtable_range_tombstone_iter_ points to a non-empty tombstone
+          // iterator, then it means sv->mem is not the memtable that
+          // memtable_range_tombstone_iter_ points to, so SV must have changed
+          // after the sv_number_ != cur_sv_number check above. We will fall
+          // back to re-init the InternalIterator, and the tombstone iterator
+          // will be freed during db_iter destruction there.
+          if (memtable_range_tombstone_iter_) {
+            assert(!*memtable_range_tombstone_iter_ ||
+                   sv_number_ != cfd_->GetSuperVersionNumber());
+          }
+          delete t;
+        } else {  // current mutable memtable has range tombstones
+          if (!memtable_range_tombstone_iter_) {
+            delete t;
+            db_impl_->ReturnAndCleanupSuperVersion(cfd_, sv);
+            // The memtable under DBIter did not have range tombstone before
+            // refresh.
+            reinit_internal_iter();
+            break;
+          } else {
+            delete *memtable_range_tombstone_iter_;
+            *memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
+                std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
+                &cfd_->internal_comparator(), nullptr, nullptr);
+          }
+        }
+        db_impl_->ReturnAndCleanupSuperVersion(cfd_, sv);
+      }
+      // Refresh latest sequence number
+      db_iter_->set_sequence(latest_seq);
+      db_iter_->set_valid(false);
+      // Check again if the latest super version number is changed
+      uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
+      if (latest_sv_number != cur_sv_number) {
+        // If the super version number is changed after refreshing,
+        // fallback to Re-Init the InternalIterator
+        cur_sv_number = latest_sv_number;
+        continue;
+      }
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+ArenaWrappedDBIter* NewArenaWrappedDbIterator(
+    Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
+    const MutableCFOptions& mutable_cf_options, const Version* version,
+    const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
+    uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
+    ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
+  ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
+  iter->Init(env, read_options, ioptions, mutable_cf_options, version, sequence,
+             max_sequential_skip_in_iterations, version_number, read_callback,
+             db_impl, cfd, expose_blob_index, allow_refresh);
+  if (db_impl != nullptr && cfd != nullptr && allow_refresh) {
+    iter->StoreRefreshInfo(db_impl, cfd, read_callback, expose_blob_index);
+  }
+
+  return iter;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
--- a/Show More
+++ b/Show More