From 3c17930ede36b7c23c3a6a6b0b41b43532261c4f Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 4 Apr 2023 15:33:24 -0700 Subject: [PATCH] Change default block cache from 8MB to 32MB (#11350) Summary: ... which increases default number of shards from 16 to 64. Although the default block cache size is only recommended for applications where RocksDB is not performance-critical, under stress conditions, block cache mutex contention could become a performance bottleneck. This change of default should alleviate that. Note that reducing the size of cache shards (recommended minimum 512MB) could cause thrashing, e.g. on filter blocks, so capacity needs to increase to safely increase number of shards. The 8MB default dates back to 2011 or earlier (f779e7a5), when the most simultaneous threads you could get from a single CPU socket was 20 (e.g. Intel Xeon E7-8870). Now more than 100 is available. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11350 Test Plan: unit tests updated Reviewed By: cbi42 Differential Revision: D44674873 Pulled By: pdillinger fbshipit-source-id: 91ed3070789b42679283c7e6dc97c41a6a97bdf4 --- HISTORY.md | 1 + include/rocksdb/table.h | 2 +- options/options_test.cc | 2 +- table/block_based/block_based_table_factory.cc | 7 ++----- tools/db_bench_tool.cc | 6 +++--- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 41f1e8bd1..5bac01687 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Add `multi_get_for_update` to C API. ### Behavior changes +* Changed default block cache size from an 8MB to 32MB LRUCache, which increases the default number of cache shards from 16 to 64. This change is intended to minimize cache mutex contention under stress conditions. See https://github.com/facebook/rocksdb/wiki/Block-Cache for more information. * For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now trivially moves levels down to fill LSM starting from bottommost level during DB open. See more in comments for option `level_compaction_dynamic_level_bytes`. ### New Features diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 9d7e3d3b8..6e8f60577 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -259,7 +259,7 @@ struct BlockBasedTableOptions { bool no_block_cache = false; // If non-NULL use the specified cache for blocks. - // If NULL, rocksdb will automatically create and use an 8MB internal cache. + // If NULL, rocksdb will automatically create and use a 32MB internal cache. std::shared_ptr block_cache = nullptr; // If non-NULL use the specified cache for pages read from device diff --git a/options/options_test.cc b/options/options_test.cc index 481259a9e..935b6b9fd 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -2161,7 +2161,7 @@ TEST_F(OptionsTest, ConvertOptionsTest) { const auto table_opt = table_factory->GetOptions(); ASSERT_NE(table_opt, nullptr); - ASSERT_EQ(table_opt->block_cache->GetCapacity(), 8UL << 20); + ASSERT_EQ(table_opt->block_cache->GetCapacity(), 32UL << 20); ASSERT_EQ(table_opt->block_size, leveldb_opt.block_size); ASSERT_EQ(table_opt->block_restart_interval, leveldb_opt.block_restart_interval); diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 845f3a619..dc852e543 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -443,11 +443,8 @@ void BlockBasedTableFactory::InitializeOptions() { table_options_.block_cache.reset(); } else if (table_options_.block_cache == nullptr) { LRUCacheOptions co; - co.capacity = 8 << 20; - // It makes little sense to pay overhead for mid-point insertion while the - // block size is only 8MB. - co.high_pri_pool_ratio = 0.0; - co.low_pri_pool_ratio = 0.0; + // 32MB, the recommended minimum size for 64 shards, to reduce contention + co.capacity = 32 << 20; table_options_.block_cache = NewLRUCache(co); } if (table_options_.block_size_deviation < 0 || diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index e6afc625f..759f634b2 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -548,7 +548,7 @@ DEFINE_bool(universal_allow_trivial_move, false, DEFINE_bool(universal_incremental, false, "Enable incremental compactions in universal compaction."); -DEFINE_int64(cache_size, 8 << 20, // 8MB +DEFINE_int64(cache_size, 32 << 20, // 32MB "Number of bytes to use as a cache of uncompressed data"); DEFINE_int32(cache_numshardbits, -1, @@ -569,7 +569,7 @@ DEFINE_string(cache_type, "lru_cache", "Type of block cache."); DEFINE_bool(use_compressed_secondary_cache, false, "Use the CompressedSecondaryCache as the secondary cache."); -DEFINE_int64(compressed_secondary_cache_size, 8 << 20, // 8MB +DEFINE_int64(compressed_secondary_cache_size, 32 << 20, // 32MB "Number of bytes to use as a cache of data"); DEFINE_int32(compressed_secondary_cache_numshardbits, 6, @@ -4590,7 +4590,7 @@ class Benchmark { if (FLAGS_cache_size > 0) { // This violates this function's rules on when to set options. But we // have to do it because the case of unconfigured block cache in OPTIONS - // file is indistinguishable (it is sanitized to 8MB by this point, not + // file is indistinguishable (it is sanitized to 32MB by this point, not // nullptr), and our regression tests assume this will be the shared // block cache, even with OPTIONS file provided. table_options->block_cache = cache_;