diff --git a/HISTORY.md b/HISTORY.md index 815ac9c80..68b079ebb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. +* Added `num_deletions` and `num_merge_operands` members to `TableProperties`. ### Bug Fixes * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. diff --git a/db/column_family.cc b/db/column_family.cc index 8ea3d1216..70bc9e898 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -105,9 +105,6 @@ void GetIntTblPropCollectorFactory( int_tbl_prop_collector_factories->emplace_back( new UserKeyTablePropertiesCollectorFactory(collector_factories[i])); } - // Add collector to collect internal key statistics - int_tbl_prop_collector_factories->emplace_back( - new InternalKeyPropertiesCollectorFactory); } Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) { diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index bdd50ec6c..e496ad6c2 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -170,6 +170,8 @@ void ResetTableProperties(TableProperties* tp) { tp->raw_value_size = 0; tp->num_data_blocks = 0; tp->num_entries = 0; + tp->num_deletions = 0; + tp->num_merge_operands = 0; tp->num_range_deletions = 0; } @@ -179,17 +181,19 @@ void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { std::replace(tp_string.begin(), tp_string.end(), '=', ' '); ResetTableProperties(tp); sscanf(tp_string.c_str(), - "# data blocks %" SCNu64 " # entries %" SCNu64 - " # range deletions %" SCNu64 " raw key size %" SCNu64 + "# data blocks %" SCNu64 " # entries %" SCNu64 " # deletions %" SCNu64 + " # merge operands %" SCNu64 " # range deletions %" SCNu64 + " raw key size %" SCNu64 " raw average key size %lf " " raw value size %" SCNu64 " raw average value size %lf " " data block size %" SCNu64 " index block size (user-key? %" SCNu64 ", delta-value? %" SCNu64 ") %" SCNu64 " filter block size %" SCNu64, - &tp->num_data_blocks, &tp->num_entries, &tp->num_range_deletions, - &tp->raw_key_size, &dummy_double, &tp->raw_value_size, &dummy_double, - &tp->data_size, &tp->index_key_is_user_key, - &tp->index_value_is_delta_encoded, &tp->index_size, &tp->filter_size); + &tp->num_data_blocks, &tp->num_entries, &tp->num_deletions, + &tp->num_merge_operands, &tp->num_range_deletions, &tp->raw_key_size, + &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, + &tp->index_key_is_user_key, &tp->index_value_is_delta_encoded, + &tp->index_size, &tp->filter_size); } void VerifySimilar(uint64_t a, uint64_t b, double bias) { @@ -217,28 +221,43 @@ void VerifyTableProperties(const TableProperties& base_tp, VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, num_data_blocks_bias); + ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); + ASSERT_EQ(base_tp.num_deletions, new_tp.num_deletions); ASSERT_EQ(base_tp.num_range_deletions, new_tp.num_range_deletions); + + // Merge operands may become Puts, so we only have an upper bound the exact + // number of merge operands. + ASSERT_GE(base_tp.num_merge_operands, new_tp.num_merge_operands); } void GetExpectedTableProperties( TableProperties* expected_tp, const int kKeySize, const int kValueSize, - const int kKeysPerTable, const int kRangeDeletionsPerTable, + const int kPutsPerTable, const int kDeletionsPerTable, + const int kMergeOperandsPerTable, const int kRangeDeletionsPerTable, const int kTableCount, const int kBloomBitsPerKey, const size_t kBlockSize, const bool index_key_is_user_key, const bool value_delta_encoding) { - const int kKeyCount = kTableCount * kKeysPerTable; + const int kKeysPerTable = + kPutsPerTable + kDeletionsPerTable + kMergeOperandsPerTable; + const int kPutCount = kTableCount * kPutsPerTable; + const int kDeletionCount = kTableCount * kDeletionsPerTable; + const int kMergeCount = kTableCount * kMergeOperandsPerTable; const int kRangeDeletionCount = kTableCount * kRangeDeletionsPerTable; + const int kKeyCount = kPutCount + kDeletionCount + kMergeCount; const int kAvgSuccessorSize = kKeySize / 5; const int kEncodingSavePerKey = kKeySize / 4; - expected_tp->raw_key_size = (kKeyCount + kRangeDeletionCount) * (kKeySize + 8); - expected_tp->raw_value_size = (kKeyCount + kRangeDeletionCount) * kValueSize; + expected_tp->raw_key_size = + (kKeyCount + kRangeDeletionCount) * (kKeySize + 8); + expected_tp->raw_value_size = + (kPutCount + kMergeCount + kRangeDeletionCount) * kValueSize; expected_tp->num_entries = kKeyCount; + expected_tp->num_deletions = kDeletionCount; + expected_tp->num_merge_operands = kMergeCount; expected_tp->num_range_deletions = kRangeDeletionCount; expected_tp->num_data_blocks = - kTableCount * - (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / + kTableCount * (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / kBlockSize; expected_tp->data_size = kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); @@ -298,8 +317,10 @@ TEST_F(DBPropertiesTest, ValidateSampleNumber) { TEST_F(DBPropertiesTest, AggregatedTableProperties) { for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { + const int kDeletionsPerTable = 5; + const int kMergeOperandsPerTable = 15; const int kRangeDeletionsPerTable = 5; - const int kKeysPerTable = 100; + const int kPutsPerTable = 100; const int kKeySize = 80; const int kValueSize = 200; const int kBloomBitsPerKey = 20; @@ -308,6 +329,8 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { options.level0_file_num_compaction_trigger = 8; options.compression = kNoCompression; options.create_if_missing = true; + options.preserve_deletes = true; + options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( @@ -323,10 +346,17 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { Random rnd(5632); for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { + for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } + for (int i = 0; i < kDeletionsPerTable; i++) { + db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); + } + for (int i = 0; i < kMergeOperandsPerTable; i++) { + db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; @@ -343,11 +373,11 @@ TEST_F(DBPropertiesTest, AggregatedTableProperties) { bool value_is_delta_encoded = output_tp.index_value_is_delta_encoded > 0; TableProperties expected_tp; - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, kRangeDeletionsPerTable, - kTableCount, kBloomBitsPerKey, - table_options.block_size, index_key_is_user_key, - value_is_delta_encoded); + GetExpectedTableProperties( + &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, + kMergeOperandsPerTable, kRangeDeletionsPerTable, kTableCount, + kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, + value_is_delta_encoded); VerifyTableProperties(expected_tp, output_tp); } @@ -469,8 +499,10 @@ TEST_F(DBPropertiesTest, ReadLatencyHistogramByLevel) { TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { const int kTableCount = 100; + const int kDeletionsPerTable = 2; + const int kMergeOperandsPerTable = 2; const int kRangeDeletionsPerTable = 2; - const int kKeysPerTable = 10; + const int kPutsPerTable = 10; const int kKeySize = 50; const int kValueSize = 400; const int kMaxLevel = 7; @@ -486,6 +518,8 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { options.max_bytes_for_level_multiplier = 2; // This ensures there no compaction happening when we call GetProperty(). options.disable_auto_compactions = true; + options.preserve_deletes = true; + options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( @@ -503,10 +537,17 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { TableProperties level_tps[kMaxLevel]; TableProperties tp, sum_tp, expected_tp; for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kKeysPerTable; ++i) { + for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } + for (int i = 0; i < kDeletionsPerTable; i++) { + db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); + } + for (int i = 0; i < kMergeOperandsPerTable; i++) { + db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; @@ -528,6 +569,8 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { sum_tp.raw_value_size += level_tps[level].raw_value_size; sum_tp.num_data_blocks += level_tps[level].num_data_blocks; sum_tp.num_entries += level_tps[level].num_entries; + sum_tp.num_deletions += level_tps[level].num_deletions; + sum_tp.num_merge_operands += level_tps[level].num_merge_operands; sum_tp.num_range_deletions += level_tps[level].num_range_deletions; } db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); @@ -541,12 +584,15 @@ TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); ASSERT_EQ(sum_tp.num_entries, tp.num_entries); + ASSERT_EQ(sum_tp.num_deletions, tp.num_deletions); + ASSERT_EQ(sum_tp.num_merge_operands, tp.num_merge_operands); ASSERT_EQ(sum_tp.num_range_deletions, tp.num_range_deletions); if (table > 3) { - GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, - kKeysPerTable, kRangeDeletionsPerTable, table, - kBloomBitsPerKey, table_options.block_size, - index_key_is_user_key, value_is_delta_encoded); + GetExpectedTableProperties( + &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, + kMergeOperandsPerTable, kRangeDeletionsPerTable, table, + kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, + value_is_delta_encoded); // Gives larger bias here as index block size, filter block size, // and data block size become much harder to estimate in this test. VerifyTableProperties(expected_tp, tp, 0.5, 0.4, 0.4, 0.25); diff --git a/db/table_properties_collector.cc b/db/table_properties_collector.cc index 084cf139d..4254e179c 100644 --- a/db/table_properties_collector.cc +++ b/db/table_properties_collector.cc @@ -11,52 +11,6 @@ namespace rocksdb { -Status InternalKeyPropertiesCollector::InternalAdd(const Slice& key, - const Slice& /*value*/, - uint64_t /*file_size*/) { - ParsedInternalKey ikey; - if (!ParseInternalKey(key, &ikey)) { - return Status::InvalidArgument("Invalid internal key"); - } - - // Note: We count both, deletions and single deletions here. - if (ikey.type == ValueType::kTypeDeletion || - ikey.type == ValueType::kTypeSingleDeletion) { - ++deleted_keys_; - } else if (ikey.type == ValueType::kTypeMerge) { - ++merge_operands_; - } - - return Status::OK(); -} - -Status InternalKeyPropertiesCollector::Finish( - UserCollectedProperties* properties) { - assert(properties); - assert(properties->find( - InternalKeyTablePropertiesNames::kDeletedKeys) == properties->end()); - assert(properties->find(InternalKeyTablePropertiesNames::kMergeOperands) == - properties->end()); - - std::string val_deleted_keys; - PutVarint64(&val_deleted_keys, deleted_keys_); - properties->insert( - {InternalKeyTablePropertiesNames::kDeletedKeys, val_deleted_keys}); - - std::string val_merge_operands; - PutVarint64(&val_merge_operands, merge_operands_); - properties->insert( - {InternalKeyTablePropertiesNames::kMergeOperands, val_merge_operands}); - - return Status::OK(); -} - -UserCollectedProperties -InternalKeyPropertiesCollector::GetReadableProperties() const { - return {{"kDeletedKeys", ToString(deleted_keys_)}, - {"kMergeOperands", ToString(merge_operands_)}}; -} - namespace { uint64_t GetUint64Property(const UserCollectedProperties& props, @@ -97,23 +51,17 @@ UserKeyTablePropertiesCollector::GetReadableProperties() const { return collector_->GetReadableProperties(); } - -const std::string InternalKeyTablePropertiesNames::kDeletedKeys - = "rocksdb.deleted.keys"; -const std::string InternalKeyTablePropertiesNames::kMergeOperands = - "rocksdb.merge.operands"; - uint64_t GetDeletedKeys( const UserCollectedProperties& props) { bool property_present_ignored; - return GetUint64Property(props, InternalKeyTablePropertiesNames::kDeletedKeys, + return GetUint64Property(props, TablePropertiesNames::kDeletedKeys, &property_present_ignored); } uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present) { return GetUint64Property( - props, InternalKeyTablePropertiesNames::kMergeOperands, property_present); + props, TablePropertiesNames::kMergeOperands, property_present); } } // namespace rocksdb diff --git a/db/table_properties_collector.h b/db/table_properties_collector.h index 7216ec319..4792b1a81 100644 --- a/db/table_properties_collector.h +++ b/db/table_properties_collector.h @@ -14,11 +14,6 @@ namespace rocksdb { -struct InternalKeyTablePropertiesNames { - static const std::string kDeletedKeys; - static const std::string kMergeOperands; -}; - // Base class for internal table properties collector. class IntTblPropCollector { public: @@ -49,39 +44,6 @@ class IntTblPropCollectorFactory { virtual const char* Name() const = 0; }; -// Collecting the statistics for internal keys. Visible only by internal -// rocksdb modules. -class InternalKeyPropertiesCollector : public IntTblPropCollector { - public: - virtual Status InternalAdd(const Slice& key, const Slice& value, - uint64_t file_size) override; - - virtual Status Finish(UserCollectedProperties* properties) override; - - virtual const char* Name() const override { - return "InternalKeyPropertiesCollector"; - } - - UserCollectedProperties GetReadableProperties() const override; - - private: - uint64_t deleted_keys_ = 0; - uint64_t merge_operands_ = 0; -}; - -class InternalKeyPropertiesCollectorFactory - : public IntTblPropCollectorFactory { - public: - virtual IntTblPropCollector* CreateIntTblPropCollector( - uint32_t /*column_family_id*/) override { - return new InternalKeyPropertiesCollector(); - } - - virtual const char* Name() const override { - return "InternalKeyPropertiesCollectorFactory"; - } -}; - // When rocksdb creates a new table, it will encode all "user keys" into // "internal keys", which contains meta information of a given entry. // diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 2147bf3fa..0944bb4c2 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -399,9 +399,6 @@ void TestInternalKeyPropertiesCollector( ImmutableCFOptions ioptions(options); GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); options.comparator = comparator; - } else { - int_tbl_prop_collector_factories.emplace_back( - new InternalKeyPropertiesCollectorFactory); } const ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); diff --git a/db/version_set.cc b/db/version_set.cc index 0782a64a5..b86fac22e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1341,7 +1341,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { } if (tp.get() == nullptr) return false; file_meta->num_entries = tp->num_entries; - file_meta->num_deletions = GetDeletedKeys(tp->user_collected_properties); + file_meta->num_deletions = tp->num_deletions; file_meta->raw_value_size = tp->raw_value_size; file_meta->raw_key_size = tp->raw_key_size; diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index d545e455f..75c180ff4 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -40,6 +40,8 @@ struct TablePropertiesNames { static const std::string kRawValueSize; static const std::string kNumDataBlocks; static const std::string kNumEntries; + static const std::string kDeletedKeys; + static const std::string kMergeOperands; static const std::string kNumRangeDeletions; static const std::string kFormatVersion; static const std::string kFixedKeyLen; @@ -152,6 +154,10 @@ struct TableProperties { uint64_t num_data_blocks = 0; // the number of entries in this table uint64_t num_entries = 0; + // the number of deletions in the table + uint64_t num_deletions = 0; + // the number of merge operands in the table + uint64_t num_merge_operands = 0; // the number of range deletions in this table uint64_t num_range_deletions = 0; // format version, reserved for backward compatibility @@ -216,6 +222,10 @@ struct TableProperties { // Below is a list of non-basic properties that are collected by database // itself. Especially some properties regarding to the internal keys (which // is unknown to `table`). +// +// DEPRECATED: these properties now belong as TableProperties members. Please +// use TableProperties::num_deletions and TableProperties::num_merge_operands, +// respectively. extern uint64_t GetDeletedKeys(const UserCollectedProperties& props); extern uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present); diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index d78f1e27f..542c989f1 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -450,6 +450,11 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { r->props.num_entries++; r->props.raw_key_size += key.size(); r->props.raw_value_size += value.size(); + if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion) { + r->props.num_deletions++; + } else if (value_type == kTypeMerge) { + r->props.num_merge_operands++; + } r->index_builder->OnKeyAdded(key); NotifyCollectTableCollectorsOnAdd(key, value, r->offset, diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 7d9842a95..f590e6ad4 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -289,6 +289,7 @@ Status CuckooTableBuilder::Finish() { } } properties_.num_entries = num_entries_; + properties_.num_deletions = num_entries_ - num_values_; properties_.fixed_key_len = key_size_; properties_.user_collected_properties[ CuckooTablePropertyNames::kValueLength].assign( diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 27eacf6ec..e4a1804e7 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -43,6 +43,13 @@ class CuckooBuilderTest : public testing::Test { std::string expected_unused_bucket, uint64_t expected_table_size, uint32_t expected_num_hash_func, bool expected_is_last_level, uint32_t expected_cuckoo_block_size = 1) { + uint64_t num_deletions = 0; + for (const auto& key : keys) { + ParsedInternalKey parsed; + if (ParseInternalKey(key, &parsed) && parsed.type == kTypeDeletion) { + num_deletions++; + } + } // Read file unique_ptr read_file; ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); @@ -90,6 +97,7 @@ class CuckooBuilderTest : public testing::Test { ASSERT_EQ(expected_is_last_level, is_last_level_found); ASSERT_EQ(props->num_entries, keys.size()); + ASSERT_EQ(props->num_deletions, num_deletions); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); ASSERT_EQ(props->data_size, expected_unused_bucket.size() * (expected_table_size + expected_cuckoo_block_size - 1)); @@ -126,9 +134,10 @@ class CuckooBuilderTest : public testing::Test { } } - std::string GetInternalKey(Slice user_key, bool zero_seqno) { + std::string GetInternalKey(Slice user_key, bool zero_seqno, + ValueType type = kTypeValue) { IterKey ikey; - ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, kTypeValue); + ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type); return ikey.GetInternalKey().ToString(); } @@ -169,50 +178,57 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { } TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { - uint32_t num_hash_fun = 4; - std::vector user_keys = {"key01", "key02", "key03", "key04"}; - std::vector values = {"v01", "v02", "v03", "v04"}; - // Need to have a temporary variable here as VS compiler does not currently - // support operator= with initializer_list as a parameter - std::unordered_map> hm = { - {user_keys[0], {0, 1, 2, 3}}, - {user_keys[1], {1, 2, 3, 4}}, - {user_keys[2], {2, 3, 4, 5}}, - {user_keys[3], {3, 4, 5, 6}}}; - hash_map = std::move(hm); - - std::vector expected_locations = {0, 1, 2, 3}; - std::vector keys; - for (auto& user_key : user_keys) { - keys.push_back(GetInternalKey(user_key, false)); - } - uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - - unique_ptr writable_file; - fname = test::PerThreadDBPath("NoCollisionFullKey"); - ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); - CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, - 100, BytewiseComparator(), 1, false, false, - GetSliceHash, 0 /* column_family_id */, - kDefaultColumnFamilyName); - ASSERT_OK(builder.status()); - for (uint32_t i = 0; i < user_keys.size(); i++) { - builder.Add(Slice(keys[i]), Slice(values[i])); - ASSERT_EQ(builder.NumEntries(), i + 1); + for (auto type : {kTypeValue, kTypeDeletion}) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values; + if (type == kTypeValue) { + values = {"v01", "v02", "v03", "v04"}; + } else { + values = {"", "", "", ""}; + } + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {1, 2, 3, 4}}, + {user_keys[2], {2, 3, 4, 5}}, + {user_keys[3], {3, 4, 5, 6}}}; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false, type)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + unique_ptr writable_file; + fname = test::PerThreadDBPath("NoCollisionFullKey"); + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false); } - size_t bucket_size = keys[0].size() + values[0].size(); - ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); - ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); - ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - - std::string expected_unused_bucket = GetInternalKey("key00", true); - expected_unused_bucket += std::string(values[0].size(), 'a'); - CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_table_size, 2, false); } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 256730bfa..9553efa08 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -79,6 +79,8 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kIndexValueIsDeltaEncoded, props.index_value_is_delta_encoded); Add(TablePropertiesNames::kNumEntries, props.num_entries); + Add(TablePropertiesNames::kDeletedKeys, props.num_deletions); + Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands); Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions); Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); Add(TablePropertiesNames::kFilterSize, props.filter_size); @@ -229,6 +231,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {TablePropertiesNames::kNumDataBlocks, &new_table_properties->num_data_blocks}, {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, + {TablePropertiesNames::kDeletedKeys, + &new_table_properties->num_deletions}, + {TablePropertiesNames::kMergeOperands, + &new_table_properties->num_merge_operands}, {TablePropertiesNames::kNumRangeDeletions, &new_table_properties->num_range_deletions}, {TablePropertiesNames::kFormatVersion, @@ -263,6 +269,12 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {key, handle.offset() + iter.ValueOffset()}); if (pos != predefined_uint64_properties.end()) { + if (key == TablePropertiesNames::kDeletedKeys || + key == TablePropertiesNames::kMergeOperands) { + // Insert in user-collected properties for API backwards compatibility + new_table_properties->user_collected_properties.insert( + {key, raw_val.ToString()}); + } // handle predefined rocksdb properties uint64_t val; if (!GetVarint64(&raw_val, &val)) { diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 717635cc1..453b6c768 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -166,6 +166,12 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { properties_.num_entries++; properties_.raw_key_size += key.size(); properties_.raw_value_size += value.size(); + if (internal_key.type == kTypeDeletion || + internal_key.type == kTypeSingleDeletion) { + properties_.num_deletions++; + } else if (internal_key.type == kTypeMerge) { + properties_.num_merge_operands++; + } // notify property collectors NotifyCollectTableCollectorsOnAdd( diff --git a/table/table_properties.cc b/table/table_properties.cc index 207a64191..56e1d03f1 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -78,6 +78,9 @@ std::string TableProperties::ToString( AppendProperty(result, "# data blocks", num_data_blocks, prop_delim, kv_delim); AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); + AppendProperty(result, "# deletions", num_deletions, prop_delim, kv_delim); + AppendProperty(result, "# merge operands", num_merge_operands, prop_delim, + kv_delim); AppendProperty(result, "# range deletions", num_range_deletions, prop_delim, kv_delim); @@ -170,6 +173,8 @@ void TableProperties::Add(const TableProperties& tp) { raw_value_size += tp.raw_value_size; num_data_blocks += tp.num_data_blocks; num_entries += tp.num_entries; + num_deletions += tp.num_deletions; + num_merge_operands += tp.num_merge_operands; num_range_deletions += tp.num_range_deletions; } @@ -195,6 +200,9 @@ const std::string TablePropertiesNames::kNumDataBlocks = "rocksdb.num.data.blocks"; const std::string TablePropertiesNames::kNumEntries = "rocksdb.num.entries"; +const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys"; +const std::string TablePropertiesNames::kMergeOperands = + "rocksdb.merge.operands"; const std::string TablePropertiesNames::kNumRangeDeletions = "rocksdb.num.range-deletions"; const std::string TablePropertiesNames::kFilterPolicy = diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index d0840feb2..b4c74d1b9 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -2871,10 +2871,6 @@ void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { if (table_properties != nullptr) { std::cout << std::endl << "Table Properties:" << std::endl; std::cout << table_properties->ToString("\n") << std::endl; - std::cout << "# deleted keys: " - << rocksdb::GetDeletedKeys( - table_properties->user_collected_properties) - << std::endl; } } } diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 6ca56aad9..e3341e34b 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -646,19 +646,6 @@ int SSTDumpTool::Run(int argc, char** argv) { "------------------------------\n" " %s", table_properties->ToString("\n ", ": ").c_str()); - fprintf(stdout, "# deleted keys: %" PRIu64 "\n", - rocksdb::GetDeletedKeys( - table_properties->user_collected_properties)); - - bool property_present; - uint64_t merge_operands = rocksdb::GetMergeOperands( - table_properties->user_collected_properties, &property_present); - if (property_present) { - fprintf(stdout, " # merge operands: %" PRIu64 "\n", - merge_operands); - } else { - fprintf(stdout, " # merge operands: UNKNOWN\n"); - } } total_num_files += 1; total_num_data_blocks += table_properties->num_data_blocks;