From f8c685c4fcf6b022105f221e4269d7b17a2f0b2f Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 16 Nov 2021 11:14:02 -0800 Subject: [PATCH] Check for and disallow shared key space in block caches (#9172) Summary: We have three layers of block cache that often use the same key but map to different physical data: * BlockBasedTableOptions::block_cache * BlockBasedTableOptions::block_cache_compressed * BlockBasedTableOptions::persistent_cache If any two of these happen to share an underlying implementation and key space (insertion into one shows up in another), then memory safety is broken. The simplest case is block_cache == block_cache_compressed. (Credit mrambacher for asking about this case in a review.) With this change, we explicitly check for overlap and preemptively and safely fail with a Status code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9172 Test Plan: test added. Crashes without new check Reviewed By: anand1976 Differential Revision: D32465659 Pulled By: pdillinger fbshipit-source-id: 3876b45b6dce6167e5a7a642725ddc86b96f8e40 --- HISTORY.md | 1 + cache/cache_entry_roles.h | 12 +- db/db_block_cache_test.cc | 137 ++++++++++++++++++ include/rocksdb/persistent_cache.h | 2 +- .../block_based/block_based_table_factory.cc | 118 +++++++++++++++ 5 files changed, 265 insertions(+), 5 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 99c5b8b8d..11128c4ea 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,7 @@ * Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`. * Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete. * Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer. +* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.) ### Behavior Changes * `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files. diff --git a/cache/cache_entry_roles.h b/cache/cache_entry_roles.h index 9a6a2ad24..65f1c464b 100644 --- a/cache/cache_entry_roles.h +++ b/cache/cache_entry_roles.h @@ -7,6 +7,8 @@ #include #include +#include +#include #include #include "rocksdb/cache.h" @@ -90,7 +92,9 @@ struct RegisteredDeleter { // These have global linkage to help ensure compiler optimizations do not // break uniqueness for each static void Delete(const Slice& /* key */, void* value) { - delete static_cast(value); + // Supports T == Something[], unlike delete operator + std::default_delete()( + static_cast::type*>(value)); } }; @@ -98,9 +102,9 @@ template struct RegisteredNoopDeleter { RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); } - static void Delete(const Slice& /* key */, void* value) { - (void)value; - assert(value == nullptr); + static void Delete(const Slice& /* key */, void* /* value */) { + // Here was `assert(value == nullptr);` but we can also put pointers + // to static data in Cache, for testing at least. } }; diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 5c43882ac..22b1003f9 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -336,6 +336,143 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { CheckCacheCounters(options, 1, 0, 1, 0); CheckCompressedCacheCounters(options, 0, 1, 0, 0); } + +namespace { +class PersistentCacheFromCache : public PersistentCache { + public: + PersistentCacheFromCache(std::shared_ptr cache, bool read_only) + : cache_(cache), read_only_(read_only) {} + + Status Insert(const Slice& key, const char* data, + const size_t size) override { + if (read_only_) { + return Status::NotSupported(); + } + std::unique_ptr copy{new char[size]}; + std::copy_n(data, size, copy.get()); + Status s = cache_->Insert( + key, copy.get(), size, + GetCacheEntryDeleterForRole()); + if (s.ok()) { + copy.release(); + } + return s; + } + + Status Lookup(const Slice& key, std::unique_ptr* data, + size_t* size) override { + auto handle = cache_->Lookup(key); + if (handle) { + char* ptr = static_cast(cache_->Value(handle)); + *size = cache_->GetCharge(handle); + data->reset(new char[*size]); + std::copy_n(ptr, *size, data->get()); + cache_->Release(handle); + return Status::OK(); + } else { + return Status::NotFound(); + } + } + + bool IsCompressed() override { return false; } + + StatsType Stats() override { return StatsType(); } + + std::string GetPrintableOptions() const override { return ""; } + + uint64_t NewId() override { return cache_->NewId(); } + + private: + std::shared_ptr cache_; + bool read_only_; +}; + +class ReadOnlyCacheWrapper : public CacheWrapper { + using CacheWrapper::CacheWrapper; + + using Cache::Insert; + Status Insert(const Slice& /*key*/, void* /*value*/, size_t /*charge*/, + void (*)(const Slice& key, void* value) /*deleter*/, + Handle** /*handle*/, Priority /*priority*/) override { + return Status::NotSupported(); + } +}; + +} // namespace + +TEST_F(DBBlockCacheTest, TestWithSameCompressed) { + auto table_options = GetTableOptions(); + auto options = GetOptions(table_options); + InitTable(options); + + std::shared_ptr rw_cache{NewLRUCache(1000000)}; + std::shared_ptr rw_pcache{ + new PersistentCacheFromCache(rw_cache, /*read_only*/ false)}; + // Exercise some obscure behavior with read-only wrappers + std::shared_ptr ro_cache{new ReadOnlyCacheWrapper(rw_cache)}; + std::shared_ptr ro_pcache{ + new PersistentCacheFromCache(rw_cache, /*read_only*/ true)}; + + // Simple same pointer + table_options.block_cache = rw_cache; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache same as block_cache_compressed not " + "currently supported, and would be bad for performance anyway"); + + // Other cases + table_options.block_cache = ro_cache; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache and block_cache_compressed share " + "the same key space, which is not supported"); + + table_options.block_cache = rw_cache; + table_options.block_cache_compressed = ro_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache_compressed and block_cache share " + "the same key space, which is not supported"); + + table_options.block_cache = ro_cache; + table_options.block_cache_compressed.reset(); + table_options.persistent_cache = rw_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache and persistent_cache share the same " + "key space, which is not supported"); + + table_options.block_cache = rw_cache; + table_options.block_cache_compressed.reset(); + table_options.persistent_cache = ro_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: persistent_cache and block_cache share the same " + "key space, which is not supported"); + + table_options.block_cache.reset(); + table_options.no_block_cache = true; + table_options.block_cache_compressed = ro_cache; + table_options.persistent_cache = rw_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache_compressed and persistent_cache " + "share the same key space, which is not supported"); + + table_options.block_cache.reset(); + table_options.no_block_cache = true; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache = ro_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: persistent_cache and block_cache_compressed " + "share the same key space, which is not supported"); +} #endif // SNAPPY #ifndef ROCKSDB_LITE diff --git a/include/rocksdb/persistent_cache.h b/include/rocksdb/persistent_cache.h index cff6c7ecd..3bb4477f0 100644 --- a/include/rocksdb/persistent_cache.h +++ b/include/rocksdb/persistent_cache.h @@ -31,7 +31,7 @@ class PersistentCache { // Insert to page cache // // page_key Identifier to identify a page uniquely across restarts - // data Page data + // data Page data to copy (caller retains ownership) // size Size of the page virtual Status Insert(const Slice& key, const char* data, const size_t size) = 0; diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index c8ef7a97c..259effb87 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -15,6 +15,7 @@ #include #include +#include "cache/cache_entry_roles.h" #include "logging/logging.h" #include "options/options_helper.h" #include "port/port.h" @@ -23,6 +24,7 @@ #include "rocksdb/filter_policy.h" #include "rocksdb/flush_block_policy.h" #include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/table.h" #include "rocksdb/utilities/options_type.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_reader.h" @@ -488,6 +490,116 @@ Status BlockBasedTableFactory::PrepareOptions(const ConfigOptions& opts) { return TableFactory::PrepareOptions(opts); } +namespace { +// Different cache kinds use the same keys for physically different values, so +// they must not share an underlying key space with each other. +Status CheckCacheOptionCompatibility(const BlockBasedTableOptions& bbto) { + int cache_count = (bbto.block_cache != nullptr) + + (bbto.block_cache_compressed != nullptr) + + (bbto.persistent_cache != nullptr); + if (cache_count <= 1) { + // Nothing to share / overlap + return Status::OK(); + } + + // Simple pointer equality + if (bbto.block_cache == bbto.block_cache_compressed) { + return Status::InvalidArgument( + "block_cache same as block_cache_compressed not currently supported, " + "and would be bad for performance anyway"); + } + + // More complex test of shared key space, in case the instances are wrappers + // for some shared underlying cache. + std::string sentinel_key(size_t{1}, '\0'); + static char kRegularBlockCacheMarker = 'b'; + static char kCompressedBlockCacheMarker = 'c'; + static char kPersistentCacheMarker = 'p'; + if (bbto.block_cache) { + bbto.block_cache + ->Insert(Slice(sentinel_key), &kRegularBlockCacheMarker, 1, + GetNoopDeleterForRole()) + .PermitUncheckedError(); + } + if (bbto.block_cache_compressed) { + bbto.block_cache_compressed + ->Insert(Slice(sentinel_key), &kCompressedBlockCacheMarker, 1, + GetNoopDeleterForRole()) + .PermitUncheckedError(); + } + if (bbto.persistent_cache) { + // Note: persistent cache copies the data, not keeping the pointer + bbto.persistent_cache + ->Insert(Slice(sentinel_key), &kPersistentCacheMarker, 1) + .PermitUncheckedError(); + } + // If we get something different from what we inserted, that indicates + // dangerously overlapping key spaces. + if (bbto.block_cache) { + auto handle = bbto.block_cache->Lookup(Slice(sentinel_key)); + if (handle) { + auto v = static_cast(bbto.block_cache->Value(handle)); + char c = *v; + bbto.block_cache->Release(handle); + if (v == &kCompressedBlockCacheMarker) { + return Status::InvalidArgument( + "block_cache and block_cache_compressed share the same key space, " + "which is not supported"); + } else if (c == kPersistentCacheMarker) { + return Status::InvalidArgument( + "block_cache and persistent_cache share the same key space, " + "which is not supported"); + } else if (v != &kRegularBlockCacheMarker) { + return Status::Corruption("Unexpected mutation to block_cache"); + } + } + } + if (bbto.block_cache_compressed) { + auto handle = bbto.block_cache_compressed->Lookup(Slice(sentinel_key)); + if (handle) { + auto v = static_cast(bbto.block_cache_compressed->Value(handle)); + char c = *v; + bbto.block_cache_compressed->Release(handle); + if (v == &kRegularBlockCacheMarker) { + return Status::InvalidArgument( + "block_cache_compressed and block_cache share the same key space, " + "which is not supported"); + } else if (c == kPersistentCacheMarker) { + return Status::InvalidArgument( + "block_cache_compressed and persistent_cache share the same key " + "space, " + "which is not supported"); + } else if (v != &kCompressedBlockCacheMarker) { + return Status::Corruption( + "Unexpected mutation to block_cache_compressed"); + } + } + } + if (bbto.persistent_cache) { + std::unique_ptr data; + size_t size = 0; + bbto.persistent_cache->Lookup(Slice(sentinel_key), &data, &size) + .PermitUncheckedError(); + if (data && size > 0) { + if (data[0] == kRegularBlockCacheMarker) { + return Status::InvalidArgument( + "persistent_cache and block_cache share the same key space, " + "which is not supported"); + } else if (data[0] == kCompressedBlockCacheMarker) { + return Status::InvalidArgument( + "persistent_cache and block_cache_compressed share the same key " + "space, " + "which is not supported"); + } else if (data[0] != kPersistentCacheMarker) { + return Status::Corruption("Unexpected mutation to persistent_cache"); + } + } + } + return Status::OK(); +} + +} // namespace + Status BlockBasedTableFactory::NewTableReader( const ReadOptions& ro, const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, @@ -566,6 +678,12 @@ Status BlockBasedTableFactory::ValidateOptions( "max_successive_merges larger than 0 is currently inconsistent with " "unordered_write"); } + { + Status s = CheckCacheOptionCompatibility(table_options_); + if (!s.ok()) { + return s; + } + } std::string garbage; if (!SerializeEnum(checksum_type_string_map, table_options_.checksum, &garbage)) {