From b6269b078a58155e5dfe26d6af087ab10d5eba00 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 18 Aug 2021 11:32:00 -0700 Subject: [PATCH] Stable cache keys on ingested SST files (#8669) Summary: Extends https://github.com/facebook/rocksdb/issues/8659 to work for ingested external SST files, even the same file ingested into different DBs sharing a block cache. Note: These new cache keys are currently only enabled when FileSystem does not provide GetUniqueId. For now, they are typically larger, so slightly less efficient. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8669 Test Plan: Extended unit test Reviewed By: zhichao-cao Differential Revision: D30398532 Pulled By: pdillinger fbshipit-source-id: 1f13e2af4b8bfff5741953a69466e9589fbc23c7 --- db/db_block_cache_test.cc | 51 ++++++++++++++++--- db/db_impl/db_impl.cc | 16 ++++-- db/db_impl/db_impl.h | 2 + db/repair.cc | 20 +++----- table/block_based/block_based_table_reader.cc | 2 - table/sst_file_writer.cc | 3 +- util/defer.h | 8 +++ 7 files changed, 73 insertions(+), 29 deletions(-) diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index c7c100f63..8d9a883b6 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -18,6 +18,7 @@ #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "util/compression.h" +#include "util/defer.h" #include "util/random.h" #include "utilities/fault_injection_fs.h" @@ -1310,7 +1311,7 @@ class StableCacheKeyTestFS : public FaultInjectionTestFS { SetFailGetUniqueId(true); } - virtual ~StableCacheKeyTestFS() {} + virtual ~StableCacheKeyTestFS() override {} IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&, IODebugContext*) override { @@ -1342,16 +1343,17 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) { table_options.no_block_cache = true; table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false); verify_stats = [&options] { + // One for ordinary SST file and one for external SST file ASSERT_EQ( - 1, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD)); + 2, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD)); }; } else { table_options.cache_index_and_filter_blocks = true; table_options.block_cache = NewLRUCache(1 << 25, 0, false); verify_stats = [&options] { - ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(1, + ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); }; } @@ -1360,18 +1362,41 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); + // Ordinary SST file ASSERT_OK(Put("key1", "abc")); std::string something_compressible(500U, 'x'); - ASSERT_OK(Put("key2", something_compressible)); + ASSERT_OK(Put("key1a", something_compressible)); ASSERT_OK(Flush()); +#ifndef ROCKSDB_LITE + // External SST file + std::string external = dbname_ + "/external.sst"; + { + SstFileWriter sst_file_writer(EnvOptions(), options); + ASSERT_OK(sst_file_writer.Open(external)); + ASSERT_OK(sst_file_writer.Put("key2", "abc")); + ASSERT_OK(sst_file_writer.Put("key2a", something_compressible)); + ExternalSstFileInfo external_info; + ASSERT_OK(sst_file_writer.Finish(&external_info)); + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts)); + } +#else + // Another ordinary SST file + ASSERT_OK(Put("key2", "abc")); + ASSERT_OK(Put("key2a", something_compressible)); + ASSERT_OK(Flush()); +#endif + ASSERT_EQ(Get("key1"), std::string("abc")); + ASSERT_EQ(Get("key2"), std::string("abc")); verify_stats(); // Make sure we can cache hit after re-open Reopen(options); ASSERT_EQ(Get("key1"), std::string("abc")); + ASSERT_EQ(Get("key2"), std::string("abc")); verify_stats(); // Make sure we can cache hit even on a full copy of the DB. Using @@ -1386,14 +1411,26 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) { Close(); Destroy(options); - dbname_ = db_copy_name; + SaveAndRestore save_dbname(&dbname_, db_copy_name); Reopen(options); ASSERT_EQ(Get("key1"), std::string("abc")); + ASSERT_EQ(Get("key2"), std::string("abc")); + verify_stats(); + + // And ensure that re-ingesting the same external file into a different DB + // uses same cache keys + DestroyAndReopen(options); + + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts)); + + ASSERT_EQ(Get("key2"), std::string("abc")); verify_stats(); #endif // !ROCKSDB_LITE Close(); + Destroy(options); } } diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 20c3c2bd4..d617fe3e7 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3945,10 +3945,10 @@ Status DBImpl::GetDbSessionId(std::string& session_id) const { return Status::OK(); } -void DBImpl::SetDbSessionId() { +std::string DBImpl::GenerateDbSessionId(Env* env) { // GenerateUniqueId() generates an identifier that has a negligible // probability of being duplicated, ~128 bits of entropy - std::string uuid = env_->GenerateUniqueId(); + std::string uuid = env->GenerateUniqueId(); // Hash and reformat that down to a more compact format, 20 characters // in base-36 ([0-9A-Z]), which is ~103 bits of entropy, which is enough @@ -3959,15 +3959,21 @@ void DBImpl::SetDbSessionId() { // * Visually distinct from DB id format uint64_t a = NPHash64(uuid.data(), uuid.size(), 1234U); uint64_t b = NPHash64(uuid.data(), uuid.size(), 5678U); - db_session_id_.resize(20); + std::string db_session_id; + db_session_id.resize(20); static const char* const base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; size_t i = 0; for (; i < 10U; ++i, a /= 36U) { - db_session_id_[i] = base36[a % 36]; + db_session_id[i] = base36[a % 36]; } for (; i < 20U; ++i, b /= 36U) { - db_session_id_[i] = base36[b % 36]; + db_session_id[i] = base36[b % 36]; } + return db_session_id; +} + +void DBImpl::SetDbSessionId() { + db_session_id_ = GenerateDbSessionId(env_); TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_); } diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index c4bb5b68b..68107a0e8 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1114,6 +1114,8 @@ class DBImpl : public DB { State state_; }; + static std::string GenerateDbSessionId(Env* env); + protected: const std::string dbname_; std::string db_id_; diff --git a/db/repair.cc b/db/repair.cc index 7eaf8adcc..3efe63dfc 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -93,6 +93,7 @@ class Repairer { const ColumnFamilyOptions& default_cf_opts, const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs) : dbname_(dbname), + db_session_id_(DBImpl::GenerateDbSessionId(db_options.env)), env_(db_options.env), file_options_(), db_options_(SanitizeOptions(dbname_, db_options)), @@ -109,21 +110,16 @@ class Repairer { // TableCache can be small since we expect each table to be opened // once. NewLRUCache(10, db_options_.table_cache_numshardbits)), - table_cache_( - // TODO: db_session_id for TableCache should be initialized after - // db_session_id_ is set. - new TableCache(default_iopts_, &file_options_, - raw_table_cache_.get(), - /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_session_id*/ "")), + table_cache_(new TableCache(default_iopts_, &file_options_, + raw_table_cache_.get(), + /*block_cache_tracer=*/nullptr, + /*io_tracer=*/nullptr, db_session_id_)), wb_(db_options_.db_write_buffer_size), wc_(db_options_.delayed_write_rate), - // TODO: db_session_id for VersionSet should be initialized after - // db_session_id_ is set and use it for initialization. vset_(dbname_, &immutable_db_options_, file_options_, raw_table_cache_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_session_id*/ ""), + db_session_id_), next_file_number_(1), db_lock_(nullptr), closed_(false) { @@ -198,10 +194,6 @@ class Repairer { } // Just create a DBImpl temporarily so we can reuse NewDB() db_impl = new DBImpl(db_options_, dbname_); - // Also use this temp DBImpl to get a session id - status = db_impl->GetDbSessionId(db_session_id_); - } - if (status.ok()) { status = db_impl->NewDB(/*new_filenames=*/nullptr); } delete db_impl; diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 5c020084d..c9ee60aeb 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -649,8 +649,6 @@ Status BlockBasedTable::Open( // under these portable/stable keys. // Note: For now, each external SST file gets its own unique session id, // so we can use a fixed file number under than session id. - // ... except FIXME (peterd): sst_file_writer currently uses wrong - // format for db_session_ids so this approach doesn't work yet. db_session_id = rep->table_properties->db_session_id; file_num = 1; } diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 0d46c764b..0ce8f6931 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -7,6 +7,7 @@ #include +#include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "file/writable_file_writer.h" #include "rocksdb/file_system.h" @@ -245,7 +246,7 @@ Status SstFileWriter::Open(const std::string& file_path) { // Here we mimic the way db_session_id behaves by resetting the db_session_id // every time SstFileWriter is used, and in this case db_id is set to be "SST // Writer". - std::string db_session_id = r->ioptions.env->GenerateUniqueId(); + std::string db_session_id = DBImpl::GenerateDbSessionId(r->ioptions.env); if (!db_session_id.empty() && db_session_id.back() == '\n') { db_session_id.pop_back(); } diff --git a/util/defer.h b/util/defer.h index 81b0df255..a78193173 100644 --- a/util/defer.h +++ b/util/defer.h @@ -57,6 +57,14 @@ class SaveAndRestore { public: // obj is non-null pointer to value to be saved and later restored. explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {} + // new_value is stored in *obj + SaveAndRestore(T* obj, const T& new_value) + : obj_(obj), saved_(std::move(*obj)) { + *obj = new_value; + } + SaveAndRestore(T* obj, T&& new_value) : obj_(obj), saved_(std::move(*obj)) { + *obj = std::move(new_value); + } ~SaveAndRestore() { *obj_ = std::move(saved_); } // No copies