From eaaf1a6f05e14c3418eaac237467eb30ad8c404a Mon Sep 17 00:00:00 2001 From: Abhishek Madan Date: Tue, 30 Oct 2018 15:29:58 -0700 Subject: [PATCH] Promote rocksdb.{deleted.keys,merge.operands} to main table properties (#4594) Summary: Since the number of range deletions are reported in TableProperties, it is confusing to not report the number of merge operands and point deletions as top-level properties; they are accessible through the public API, but since they are not the "main" properties, they do not appear in aggregated table properties, or the string representation of table properties. This change promotes those two property keys to `rocksdb/table_properties.h`, adds corresponding uint64 members for them, deprecates the old access methods `GetDeletedKeys()` and `GetMergeOperands()` (though they are still usable for now), and removes `InternalKeyPropertiesCollector`. The property key strings are the same as before this change, so this should be able to read DBs written from older versions (though I haven't tested this yet). Pull Request resolved: https://github.com/facebook/rocksdb/pull/4594 Differential Revision: D12826893 Pulled By: abhimadan fbshipit-source-id: 9e4e4fbdc5b0da161c89582566d184101ba8eb68 --- HISTORY.md | 1 + db/column_family.cc | 3 - db/db_properties_test.cc | 96 +++++++++++++++++------- db/table_properties_collector.cc | 56 +------------- db/table_properties_collector.h | 38 ---------- db/table_properties_collector_test.cc | 3 - db/version_set.cc | 2 +- include/rocksdb/table_properties.h | 10 +++ table/block_based_table_builder.cc | 5 ++ table/cuckoo_table_builder.cc | 1 + table/cuckoo_table_builder_test.cc | 104 +++++++++++++++----------- table/meta_blocks.cc | 12 +++ table/plain_table_builder.cc | 6 ++ table/table_properties.cc | 8 ++ tools/ldb_cmd.cc | 4 - tools/sst_dump_tool.cc | 13 ---- 16 files changed, 177 insertions(+), 185 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 815ac9c80..68b079ebb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. +* Added `num_deletions` and `num_merge_operands` members to `TableProperties`. ### Bug Fixes * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. diff --git a/db/column_family.cc b/db/column_family.cc index 8ea3d1216..70bc9e898 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -105,9 +105,6 @@ void GetIntTblPropCollectorFactory( int_tbl_prop_collector_factories->emplace_back( new UserKeyTablePropertiesCollectorFactory(collector_factories[i])); } - // Add collector to collect internal key statistics - int_tbl_prop_collector_factories->emplace_back( - new InternalKeyPropertiesCollectorFactory); } Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) { diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index bdd50ec6c..e496ad6c2 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -170,6 +170,8 @@ void ResetTableProperties(TableProperties* tp) { tp->raw_value_size = 0; tp->num_data_blocks = 0; tp->num_entries = 0; + tp->num_deletions = 0; + tp->num_merge_operands = 0; tp->num_range_deletions = 0; } @@ -179,17 +181,19 @@ void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { std::replace(tp_string.begin(), tp_string.end(), '=', ' '); ResetTableProperties(tp); sscanf(tp_string.c_str(), - "# data blocks %" SCNu64 " # entries %" SCNu64 - " # range deletions %" SCNu64 " raw key size %" SCNu64 + "# data blocks %" SCNu64 " # entries %" SCNu64 " # deletions %" SCNu64 + " # merge operands %" SCNu64 " # range deletions %" SCNu64 + " raw key size %" SCNu64 " raw average key size %lf " " raw value size %" SCNu64 " raw average value size %lf " " data block size %" SCNu64 " index block size (user-key? %" SCNu64 ", delta-value? %" SCNu64 ") %" SCNu64 " filter block size %" SCNu64, - &tp->num_data_blocks, &tp->num_entries, &tp->num_range_deletions, - &tp->raw_key_size, &dummy_double, &tp->raw_value_size, &dummy_double, - &tp->data_size, &tp->index_key_is_user_key, - &tp->index_value_is_delta_encoded, &tp->index_size, &tp->filter_size); + &tp->num_data_blocks, &tp->num_entries, &tp->num_deletions, + &tp->num_merge_operands, &tp->num_range_deletions, &tp->raw_key_size, + &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, + &tp->index_key_is_user_key, &tp->index_value_is_delta_encoded, + &tp->index_size, &tp->filter_size); } void VerifySimilar(uint64_t a, uint64_t b, double bias) { @@ -217,28 +221,43 @@ void VerifyTableProperties(const TableProperties& base_tp, VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, num_data_blocks_bias); + ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); + ASSERT_EQ(base_tp.num_deletions, new_tp.num_deletions); ASSERT_EQ(base_tp.num_range_deletions, new_tp.num_range_deletions); + + // Merge operands may become Puts, so we only have an upper bound the exact + // number of merge operands. + ASSERT_GE(base_tp.num_merge_operands, new_tp.num_merge_operands); } void GetExpectedTableProperties( TableProperties* expected_tp, const int kKeySize, const int kValueSize, - const int kKeysPerTable, const int kRangeDeletionsPerTable, + const int kPutsPerTable, const int kDeletionsPerTable, + const int kMergeOperandsPerTable, const int kRangeDeletionsPerTable, const int kTableCount, const int kBloomBitsPerKey, const size_t kBlockSize, const bool index_key_is_user_key, const bool value_delta_encoding) { - const int kKeyCount = kTableCount * kKeysPerTable; + const int kKeysPerTable = + kPutsPerTable + kDeletionsPerTable + kMergeOperandsPerTable; + const int kPutCount = kTableCount * kPutsPerTable; + const int kDeletionCount = kTableCount * kDeletionsPerTable; + const int kMergeCount = kTableCount * kMergeOperandsPerTable; const int kRangeDeletionCount = kTableCount * kRangeDeletionsPerTable; + const int kKeyCount = kPutCount + kDeletionCount + kMergeCount; const int kAvgSuccessorSize = kKeySize / 5; const int kEncodingSavePerKey = kKeySize / 4; - expected_tp->raw_key_size = (kKeyCount + kRangeDeletionCount) * (kKeySize + 8); - expected_tp->raw_value_size = (kKeyCount + kRangeDeletionCount) * kValueSize; + expected_tp->raw_key_size = + (kKeyCount + kRangeDeletionCount) * (kKeySize + 8); + expected_tp->raw_value_size = + (kPutCount + kMergeCount + kRangeDeletionCount) * kValueSize; expected_tp->num_entries = kKeyCount; + expected_tp->num_deletions = kDeletionCount; + expected_tp->num_merge_operands = kMergeCount; expected_tp->num_range_deletions = kRangeDeletionCount; expected_tp->num_data_blocks = - kTableCount * - (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / + kTableCount * (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / kBlockSize; expected_tp->data_size = kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); @@ -298,8 +317,10 @@ TEST_F(DBPropertiesTest, ValidateSampleNumber) { TEST_F(DBPropertiesTest, AggregatedTableProperties) { for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { + const int kDeletionsPerTable = 5; + const int kMergeOperandsPerTable = 15; const int kRangeDeletionsPerTable = 5; - const int kKeysPerTable = 100; + const int kPutsPerTable = 100; const int kKeySize = 80; const int kValueSize = 200; const int kBloomBitsPerKey = 20; @@ -308,6 +329,8 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { options.level0_file_num_compaction_trigger = 8; options.compression = kNoCompression; options.create_if_missing = true; + options.preserve_deletes = true; + options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( @@ -323,10 +346,17 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { Random rnd(5632); for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { + for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } + for (int i = 0; i < kDeletionsPerTable; i++) { + db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); + } + for (int i = 0; i < kMergeOperandsPerTable; i++) { + db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; @@ -343,11 +373,11 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { bool value_is_delta_encoded = output_tp.index_value_is_delta_encoded > 0; TableProperties expected_tp; - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, kRangeDeletionsPerTable, - kTableCount, kBloomBitsPerKey, - table_options.block_size, index_key_is_user_key, - value_is_delta_encoded); + GetExpectedTableProperties( + &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, + kMergeOperandsPerTable, kRangeDeletionsPerTable, kTableCount, + kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, + value_is_delta_encoded); VerifyTableProperties(expected_tp, output_tp); } @@ -469,8 +499,10 @@ TEST_F(DBPropertiesTest, ReadLatencyHistogramByLevel) { TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { const int kTableCount = 100; + const int kDeletionsPerTable = 2; + const int kMergeOperandsPerTable = 2; const int kRangeDeletionsPerTable = 2; - const int kKeysPerTable = 10; + const int kPutsPerTable = 10; const int kKeySize = 50; const int kValueSize = 400; const int kMaxLevel = 7; @@ -486,6 +518,8 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { options.max_bytes_for_level_multiplier = 2; // This ensures there no compaction happening when we call GetProperty(). options.disable_auto_compactions = true; + options.preserve_deletes = true; + options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( @@ -503,10 +537,17 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { TableProperties level_tps[kMaxLevel]; TableProperties tp, sum_tp, expected_tp; for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { + for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } + for (int i = 0; i < kDeletionsPerTable; i++) { + db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); + } + for (int i = 0; i < kMergeOperandsPerTable; i++) { + db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; @@ -528,6 +569,8 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { sum_tp.raw_value_size += level_tps[level].raw_value_size; sum_tp.num_data_blocks += level_tps[level].num_data_blocks; sum_tp.num_entries += level_tps[level].num_entries; + sum_tp.num_deletions += level_tps[level].num_deletions; + sum_tp.num_merge_operands += level_tps[level].num_merge_operands; sum_tp.num_range_deletions += level_tps[level].num_range_deletions; } db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); @@ -541,12 +584,15 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); ASSERT_EQ(sum_tp.num_entries, tp.num_entries); + ASSERT_EQ(sum_tp.num_deletions, tp.num_deletions); + ASSERT_EQ(sum_tp.num_merge_operands, tp.num_merge_operands); ASSERT_EQ(sum_tp.num_range_deletions, tp.num_range_deletions); if (table > 3) { - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, kRangeDeletionsPerTable, table, - kBloomBitsPerKey, table_options.block_size, - index_key_is_user_key, value_is_delta_encoded); + GetExpectedTableProperties( + &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, + kMergeOperandsPerTable, kRangeDeletionsPerTable, table, + kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, + value_is_delta_encoded); // Gives larger bias here as index block size, filter block size, // and data block size become much harder to estimate in this test. VerifyTableProperties(expected_tp, tp, 0.5, 0.4, 0.4, 0.25); diff --git a/db/table_properties_collector.cc b/db/table_properties_collector.cc index 084cf139d..4254e179c 100644 --- a/db/table_properties_collector.cc +++ b/db/table_properties_collector.cc @@ -11,52 +11,6 @@ namespace rocksdb { -Status InternalKeyPropertiesCollector::InternalAdd(const Slice& key, - const Slice& /*value*/, - uint64_t /*file_size*/) { - ParsedInternalKey ikey; - if (!ParseInternalKey(key, &ikey)) { - return Status::InvalidArgument("Invalid internal key"); - } - - // Note: We count both, deletions and single deletions here. - if (ikey.type == ValueType::kTypeDeletion || - ikey.type == ValueType::kTypeSingleDeletion) { - ++deleted_keys_; - } else if (ikey.type == ValueType::kTypeMerge) { - ++merge_operands_; - } - - return Status::OK(); -} - -Status InternalKeyPropertiesCollector::Finish( - UserCollectedProperties* properties) { - assert(properties); - assert(properties->find( - InternalKeyTablePropertiesNames::kDeletedKeys) == properties->end()); - assert(properties->find(InternalKeyTablePropertiesNames::kMergeOperands) == - properties->end()); - - std::string val_deleted_keys; - PutVarint64(&val_deleted_keys, deleted_keys_); - properties->insert( - {InternalKeyTablePropertiesNames::kDeletedKeys, val_deleted_keys}); - - std::string val_merge_operands; - PutVarint64(&val_merge_operands, merge_operands_); - properties->insert( - {InternalKeyTablePropertiesNames::kMergeOperands, val_merge_operands}); - - return Status::OK(); -} - -UserCollectedProperties -InternalKeyPropertiesCollector::GetReadableProperties() const { - return {{"kDeletedKeys", ToString(deleted_keys_)}, - {"kMergeOperands", ToString(merge_operands_)}}; -} - namespace { uint64_t GetUint64Property(const UserCollectedProperties& props, @@ -97,23 +51,17 @@ UserKeyTablePropertiesCollector::GetReadableProperties() const { return collector_->GetReadableProperties(); } - -const std::string InternalKeyTablePropertiesNames::kDeletedKeys - = "rocksdb.deleted.keys"; -const std::string InternalKeyTablePropertiesNames::kMergeOperands = - "rocksdb.merge.operands"; - uint64_t GetDeletedKeys( const UserCollectedProperties& props) { bool property_present_ignored; - return GetUint64Property(props, InternalKeyTablePropertiesNames::kDeletedKeys, + return GetUint64Property(props, TablePropertiesNames::kDeletedKeys, &property_present_ignored); } uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present) { return GetUint64Property( - props, InternalKeyTablePropertiesNames::kMergeOperands, property_present); + props, TablePropertiesNames::kMergeOperands, property_present); } } // namespace rocksdb diff --git a/db/table_properties_collector.h b/db/table_properties_collector.h index 7216ec319..4792b1a81 100644 --- a/db/table_properties_collector.h +++ b/db/table_properties_collector.h @@ -14,11 +14,6 @@ namespace rocksdb { -struct InternalKeyTablePropertiesNames { - static const std::string kDeletedKeys; - static const std::string kMergeOperands; -}; - // Base class for internal table properties collector. class IntTblPropCollector { public: @@ -49,39 +44,6 @@ class IntTblPropCollectorFactory { virtual const char* Name() const = 0; }; -// Collecting the statistics for internal keys. Visible only by internal -// rocksdb modules. -class InternalKeyPropertiesCollector : public IntTblPropCollector { - public: - virtual Status InternalAdd(const Slice& key, const Slice& value, - uint64_t file_size) override; - - virtual Status Finish(UserCollectedProperties* properties) override; - - virtual const char* Name() const override { - return "InternalKeyPropertiesCollector"; - } - - UserCollectedProperties GetReadableProperties() const override; - - private: - uint64_t deleted_keys_ = 0; - uint64_t merge_operands_ = 0; -}; - -class InternalKeyPropertiesCollectorFactory - : public IntTblPropCollectorFactory { - public: - virtual IntTblPropCollector* CreateIntTblPropCollector( - uint32_t /*column_family_id*/) override { - return new InternalKeyPropertiesCollector(); - } - - virtual const char* Name() const override { - return "InternalKeyPropertiesCollectorFactory"; - } -}; - // When rocksdb creates a new table, it will encode all "user keys" into // "internal keys", which contains meta information of a given entry. // diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 2147bf3fa..0944bb4c2 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -399,9 +399,6 @@ void TestInternalKeyPropertiesCollector( ImmutableCFOptions ioptions(options); GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); options.comparator = comparator; - } else { - int_tbl_prop_collector_factories.emplace_back( - new InternalKeyPropertiesCollectorFactory); } const ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); diff --git a/db/version_set.cc b/db/version_set.cc index 0782a64a5..b86fac22e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1341,7 +1341,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { } if (tp.get() == nullptr) return false; file_meta->num_entries = tp->num_entries; - file_meta->num_deletions = GetDeletedKeys(tp->user_collected_properties); + file_meta->num_deletions = tp->num_deletions; file_meta->raw_value_size = tp->raw_value_size; file_meta->raw_key_size = tp->raw_key_size; diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index d545e455f..75c180ff4 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -40,6 +40,8 @@ struct TablePropertiesNames { static const std::string kRawValueSize; static const std::string kNumDataBlocks; static const std::string kNumEntries; + static const std::string kDeletedKeys; + static const std::string kMergeOperands; static const std::string kNumRangeDeletions; static const std::string kFormatVersion; static const std::string kFixedKeyLen; @@ -152,6 +154,10 @@ struct TableProperties { uint64_t num_data_blocks = 0; // the number of entries in this table uint64_t num_entries = 0; + // the number of deletions in the table + uint64_t num_deletions = 0; + // the number of merge operands in the table + uint64_t num_merge_operands = 0; // the number of range deletions in this table uint64_t num_range_deletions = 0; // format version, reserved for backward compatibility @@ -216,6 +222,10 @@ struct TableProperties { // Below is a list of non-basic properties that are collected by database // itself. Especially some properties regarding to the internal keys (which // is unknown to `table`). +// +// DEPRECATED: these properties now belong as TableProperties members. Please +// use TableProperties::num_deletions and TableProperties::num_merge_operands, +// respectively. extern uint64_t GetDeletedKeys(const UserCollectedProperties& props); extern uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present); diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index d78f1e27f..542c989f1 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -450,6 +450,11 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { r->props.num_entries++; r->props.raw_key_size += key.size(); r->props.raw_value_size += value.size(); + if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion) { + r->props.num_deletions++; + } else if (value_type == kTypeMerge) { + r->props.num_merge_operands++; + } r->index_builder->OnKeyAdded(key); NotifyCollectTableCollectorsOnAdd(key, value, r->offset, diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 7d9842a95..f590e6ad4 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -289,6 +289,7 @@ Status CuckooTableBuilder::Finish() { } } properties_.num_entries = num_entries_; + properties_.num_deletions = num_entries_ - num_values_; properties_.fixed_key_len = key_size_; properties_.user_collected_properties[ CuckooTablePropertyNames::kValueLength].assign( diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 27eacf6ec..e4a1804e7 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -43,6 +43,13 @@ class CuckooBuilderTest : public testing::Test { std::string expected_unused_bucket, uint64_t expected_table_size, uint32_t expected_num_hash_func, bool expected_is_last_level, uint32_t expected_cuckoo_block_size = 1) { + uint64_t num_deletions = 0; + for (const auto& key : keys) { + ParsedInternalKey parsed; + if (ParseInternalKey(key, &parsed) && parsed.type == kTypeDeletion) { + num_deletions++; + } + } // Read file unique_ptr read_file; ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); @@ -90,6 +97,7 @@ class CuckooBuilderTest : public testing::Test { ASSERT_EQ(expected_is_last_level, is_last_level_found); ASSERT_EQ(props->num_entries, keys.size()); + ASSERT_EQ(props->num_deletions, num_deletions); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); ASSERT_EQ(props->data_size, expected_unused_bucket.size() * (expected_table_size + expected_cuckoo_block_size - 1)); @@ -126,9 +134,10 @@ class CuckooBuilderTest : public testing::Test { } } - std::string GetInternalKey(Slice user_key, bool zero_seqno) { + std::string GetInternalKey(Slice user_key, bool zero_seqno, + ValueType type = kTypeValue) { IterKey ikey; - ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, kTypeValue); + ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type); return ikey.GetInternalKey().ToString(); } @@ -169,50 +178,57 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { } TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { - uint32_t num_hash_fun = 4; - std::vector user_keys = {"key01", "key02", "key03", "key04"}; - std::vector values = {"v01", "v02", "v03", "v04"}; - // Need to have a temporary variable here as VS compiler does not currently - // support operator= with initializer_list as a parameter - std::unordered_map> hm = { - {user_keys[0], {0, 1, 2, 3}}, - {user_keys[1], {1, 2, 3, 4}}, - {user_keys[2], {2, 3, 4, 5}}, - {user_keys[3], {3, 4, 5, 6}}}; - hash_map = std::move(hm); - - std::vector expected_locations = {0, 1, 2, 3}; - std::vector keys; - for (auto& user_key : user_keys) { - keys.push_back(GetInternalKey(user_key, false)); - } - uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - - unique_ptr writable_file; - fname = test::PerThreadDBPath("NoCollisionFullKey"); - ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); - CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, - 100, BytewiseComparator(), 1, false, false, - GetSliceHash, 0 /* column_family_id */, - kDefaultColumnFamilyName); - ASSERT_OK(builder.status()); - for (uint32_t i = 0; i < user_keys.size(); i++) { - builder.Add(Slice(keys[i]), Slice(values[i])); - ASSERT_EQ(builder.NumEntries(), i + 1); + for (auto type : {kTypeValue, kTypeDeletion}) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values; + if (type == kTypeValue) { + values = {"v01", "v02", "v03", "v04"}; + } else { + values = {"", "", "", ""}; + } + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {1, 2, 3, 4}}, + {user_keys[2], {2, 3, 4, 5}}, + {user_keys[3], {3, 4, 5, 6}}}; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false, type)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + unique_ptr writable_file; + fname = test::PerThreadDBPath("NoCollisionFullKey"); + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false); } - size_t bucket_size = keys[0].size() + values[0].size(); - ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); - ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); - ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - - std::string expected_unused_bucket = GetInternalKey("key00", true); - expected_unused_bucket += std::string(values[0].size(), 'a'); - CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_table_size, 2, false); } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 256730bfa..9553efa08 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -79,6 +79,8 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kIndexValueIsDeltaEncoded, props.index_value_is_delta_encoded); Add(TablePropertiesNames::kNumEntries, props.num_entries); + Add(TablePropertiesNames::kDeletedKeys, props.num_deletions); + Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands); Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions); Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); Add(TablePropertiesNames::kFilterSize, props.filter_size); @@ -229,6 +231,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {TablePropertiesNames::kNumDataBlocks, &new_table_properties->num_data_blocks}, {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, + {TablePropertiesNames::kDeletedKeys, + &new_table_properties->num_deletions}, + {TablePropertiesNames::kMergeOperands, + &new_table_properties->num_merge_operands}, {TablePropertiesNames::kNumRangeDeletions, &new_table_properties->num_range_deletions}, {TablePropertiesNames::kFormatVersion, @@ -263,6 +269,12 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {key, handle.offset() + iter.ValueOffset()}); if (pos != predefined_uint64_properties.end()) { + if (key == TablePropertiesNames::kDeletedKeys || + key == TablePropertiesNames::kMergeOperands) { + // Insert in user-collected properties for API backwards compatibility + new_table_properties->user_collected_properties.insert( + {key, raw_val.ToString()}); + } // handle predefined rocksdb properties uint64_t val; if (!GetVarint64(&raw_val, &val)) { diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 717635cc1..453b6c768 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -166,6 +166,12 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { properties_.num_entries++; properties_.raw_key_size += key.size(); properties_.raw_value_size += value.size(); + if (internal_key.type == kTypeDeletion || + internal_key.type == kTypeSingleDeletion) { + properties_.num_deletions++; + } else if (internal_key.type == kTypeMerge) { + properties_.num_merge_operands++; + } // notify property collectors NotifyCollectTableCollectorsOnAdd( diff --git a/table/table_properties.cc b/table/table_properties.cc index 207a64191..56e1d03f1 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -78,6 +78,9 @@ std::string TableProperties::ToString( AppendProperty(result, "# data blocks", num_data_blocks, prop_delim, kv_delim); AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); + AppendProperty(result, "# deletions", num_deletions, prop_delim, kv_delim); + AppendProperty(result, "# merge operands", num_merge_operands, prop_delim, + kv_delim); AppendProperty(result, "# range deletions", num_range_deletions, prop_delim, kv_delim); @@ -170,6 +173,8 @@ void TableProperties::Add(const TableProperties& tp) { raw_value_size += tp.raw_value_size; num_data_blocks += tp.num_data_blocks; num_entries += tp.num_entries; + num_deletions += tp.num_deletions; + num_merge_operands += tp.num_merge_operands; num_range_deletions += tp.num_range_deletions; } @@ -195,6 +200,9 @@ const std::string TablePropertiesNames::kNumDataBlocks = "rocksdb.num.data.blocks"; const std::string TablePropertiesNames::kNumEntries = "rocksdb.num.entries"; +const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys"; +const std::string TablePropertiesNames::kMergeOperands = + "rocksdb.merge.operands"; const std::string TablePropertiesNames::kNumRangeDeletions = "rocksdb.num.range-deletions"; const std::string TablePropertiesNames::kFilterPolicy = diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index d0840feb2..b4c74d1b9 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -2871,10 +2871,6 @@ void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { if (table_properties != nullptr) { std::cout << std::endl << "Table Properties:" << std::endl; std::cout << table_properties->ToString("\n") << std::endl; - std::cout << "# deleted keys: " - << rocksdb::GetDeletedKeys( - table_properties->user_collected_properties) - << std::endl; } } } diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 6ca56aad9..e3341e34b 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -646,19 +646,6 @@ int SSTDumpTool::Run(int argc, char** argv) { "------------------------------\n" " %s", table_properties->ToString("\n ", ": ").c_str()); - fprintf(stdout, "# deleted keys: %" PRIu64 "\n", - rocksdb::GetDeletedKeys( - table_properties->user_collected_properties)); - - bool property_present; - uint64_t merge_operands = rocksdb::GetMergeOperands( - table_properties->user_collected_properties, &property_present); - if (property_present) { - fprintf(stdout, " # merge operands: %" PRIu64 "\n", - merge_operands); - } else { - fprintf(stdout, " # merge operands: UNKNOWN\n"); - } } total_num_files += 1; total_num_data_blocks += table_properties->num_data_blocks;