From 9daf07305cb1914f2445f8f2dca904cb89a8a1b4 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 2 Dec 2021 08:29:12 -0800 Subject: [PATCH] Replace TableProperties::properties_offsets map with external_sst_file_global_seqno_offset (#9212) Summary: **Context:** Searching `TableProperties::properties_offsets` across the codebase reveals that internally it is only used to find the external SST file's global seqno offeset. Therefore we can narrow it down and replace this map property with a uint64_t property `external_sst_file_global_seqno_offset` to save memory usage related to table properties. Note: - See PR comments for discussion about potential impact on existing external usage of `TableProperties::properties_offsets` - See PR comments for discussion on keeping external SST file global seqno's offset VS using a simple flag indicating seqno's existence. **Summary:** - Replaced `TableProperties::properties_offsets` with `TableProperties::external_sst_file_global_seqno_offset` Pull Request resolved: https://github.com/facebook/rocksdb/pull/9212 Test Plan: - Relied on existing tests should be sufficient since `TableProperties::properties_offsets` existed before and should already be tested. Reviewed By: ajkr Differential Revision: D32665941 Pulled By: hx235 fbshipit-source-id: 718e44617346dc4f3b1276ee953e61c196277795 --- HISTORY.md | 3 ++ db/external_sst_file_ingestion_job.cc | 8 ++--- include/rocksdb/table_properties.h | 7 ++-- java/rocksjni/portal.h | 25 +++----------- java/rocksjni/testable_event_listener.cc | 2 +- .../java/org/rocksdb/TableProperties.java | 33 +++++++------------ .../java/org/rocksdb/EventListenerTest.java | 10 +++--- table/meta_blocks.cc | 14 ++++---- table/table_test.cc | 3 +- 9 files changed, 41 insertions(+), 64 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index bf0d65d70..b37408264 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,9 @@ ### Public API change * Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes. +### Performance Improvements +* Replaced map property `TableProperties::properties_offsets` with uint64_t property `external_sst_file_global_seqno_offset` to save table properties's memory. + ## 6.27.0 (2021-11-19) ### New Features * Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware. diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 0557b9168..6384eff94 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -609,14 +609,12 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( // Set the global sequence number file_to_ingest->original_seqno = DecodeFixed64(seqno_iter->second.c_str()); - auto offsets_iter = props->properties_offsets.find( - ExternalSstFilePropertyNames::kGlobalSeqno); - if (offsets_iter == props->properties_offsets.end() || - offsets_iter->second == 0) { + if (props->external_sst_file_global_seqno_offset == 0) { file_to_ingest->global_seqno_offset = 0; return Status::Corruption("Was not able to find file global seqno field"); } - file_to_ingest->global_seqno_offset = static_cast(offsets_iter->second); + file_to_ingest->global_seqno_offset = + static_cast(props->external_sst_file_global_seqno_offset); } else if (file_to_ingest->version == 1) { // SST file V1 should not have global seqno field assert(seqno_iter == uprops.end()); diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 27eb612bf..a6813e9e8 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -232,6 +232,10 @@ struct TableProperties { // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`). // 0 means unknown. uint64_t fast_compression_estimated_data_size = 0; + // Offset of the value of the property "external sst file global seqno" in the + // file if the property exists. + // 0 means not exists. + uint64_t external_sst_file_global_seqno_offset = 0; // DB identity // db_id is an identifier generated the first time the DB is created @@ -284,9 +288,6 @@ struct TableProperties { UserCollectedProperties user_collected_properties; UserCollectedProperties readable_properties; - // The offset of the value of each property in the file. - std::map properties_offsets; - // convert this object to a human readable form // @prop_delim: delimiter for each property. std::string ToString(const std::string& prop_delim = "; ", diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 512701ae0..fd8c0ed1e 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -6147,9 +6147,9 @@ class TablePropertiesJni : public JavaClass { jmethodID mid = env->GetMethodID( jclazz, "", - "(JJJJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/" + "(JJJJJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/" "lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/" - "String;Ljava/util/Map;Ljava/util/Map;Ljava/util/Map;)V"); + "String;Ljava/util/Map;Ljava/util/Map;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -6258,23 +6258,6 @@ class TablePropertiesJni : public JavaClass { return nullptr; } - // Map - jobject jproperties_offsets = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( - env, &table_properties.properties_offsets); - if (env->ExceptionCheck()) { - // exception occurred creating java map - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - env->DeleteLocalRef(jprefix_extractor_name); - env->DeleteLocalRef(jproperty_collectors_names); - env->DeleteLocalRef(jcompression_name); - env->DeleteLocalRef(juser_collected_properties); - env->DeleteLocalRef(jreadable_properties); - return nullptr; - } - jobject jtable_properties = env->NewObject( jclazz, mid, static_cast(table_properties.data_size), static_cast(table_properties.index_size), @@ -6299,10 +6282,12 @@ class TablePropertiesJni : public JavaClass { table_properties.slow_compression_estimated_data_size), static_cast( table_properties.fast_compression_estimated_data_size), + static_cast( + table_properties.external_sst_file_global_seqno_offset), jcolumn_family_name, jfilter_policy_name, jcomparator_name, jmerge_operator_name, jprefix_extractor_name, jproperty_collectors_names, jcompression_name, - juser_collected_properties, jreadable_properties, jproperties_offsets); + juser_collected_properties, jreadable_properties); if (env->ExceptionCheck()) { return nullptr; diff --git a/java/rocksjni/testable_event_listener.cc b/java/rocksjni/testable_event_listener.cc index 2540f2ecb..d5f2605a4 100644 --- a/java/rocksjni/testable_event_listener.cc +++ b/java/rocksjni/testable_event_listener.cc @@ -37,6 +37,7 @@ static TableProperties newTablePropertiesForTest() { table_properties.file_creation_time = UINT64_MAX; table_properties.slow_compression_estimated_data_size = UINT64_MAX; table_properties.fast_compression_estimated_data_size = UINT64_MAX; + table_properties.external_sst_file_global_seqno_offset = UINT64_MAX; table_properties.db_id = "dbId"; table_properties.db_session_id = "sessionId"; table_properties.column_family_name = "columnFamilyName"; @@ -49,7 +50,6 @@ static TableProperties newTablePropertiesForTest() { table_properties.compression_options = "compressionOptions"; table_properties.user_collected_properties = {{"key", "value"}}; table_properties.readable_properties = {{"key", "value"}}; - table_properties.properties_offsets = {{"key", UINT64_MAX}}; return table_properties; } diff --git a/java/src/main/java/org/rocksdb/TableProperties.java b/java/src/main/java/org/rocksdb/TableProperties.java index c1baea2a4..096341a4c 100644 --- a/java/src/main/java/org/rocksdb/TableProperties.java +++ b/java/src/main/java/org/rocksdb/TableProperties.java @@ -31,6 +31,7 @@ public class TableProperties { private final long oldestKeyTime; private final long slowCompressionEstimatedDataSize; private final long fastCompressionEstimatedDataSize; + private final long externalSstFileGlobalSeqnoOffset; private final byte[] columnFamilyName; private final String filterPolicyName; private final String comparatorName; @@ -40,7 +41,6 @@ public class TableProperties { private final String compressionName; private final Map userCollectedProperties; private final Map readableProperties; - private final Map propertiesOffsets; /** * Access is package private as this will only be constructed from @@ -54,11 +54,11 @@ public class TableProperties { final long formatVersion, final long fixedKeyLen, final long columnFamilyId, final long creationTime, final long oldestKeyTime, final long slowCompressionEstimatedDataSize, final long fastCompressionEstimatedDataSize, - final byte[] columnFamilyName, final String filterPolicyName, final String comparatorName, - final String mergeOperatorName, final String prefixExtractorName, - final String propertyCollectorsNames, final String compressionName, - final Map userCollectedProperties, - final Map readableProperties, final Map propertiesOffsets) { + final long externalSstFileGlobalSeqnoOffset, final byte[] columnFamilyName, + final String filterPolicyName, final String comparatorName, final String mergeOperatorName, + final String prefixExtractorName, final String propertyCollectorsNames, + final String compressionName, final Map userCollectedProperties, + final Map readableProperties) { this.dataSize = dataSize; this.indexSize = indexSize; this.indexPartitions = indexPartitions; @@ -80,6 +80,7 @@ public class TableProperties { this.oldestKeyTime = oldestKeyTime; this.slowCompressionEstimatedDataSize = slowCompressionEstimatedDataSize; this.fastCompressionEstimatedDataSize = fastCompressionEstimatedDataSize; + this.externalSstFileGlobalSeqnoOffset = externalSstFileGlobalSeqnoOffset; this.columnFamilyName = columnFamilyName; this.filterPolicyName = filterPolicyName; this.comparatorName = comparatorName; @@ -89,7 +90,6 @@ public class TableProperties { this.compressionName = compressionName; this.userCollectedProperties = userCollectedProperties; this.readableProperties = readableProperties; - this.propertiesOffsets = propertiesOffsets; } /** @@ -379,15 +379,6 @@ public class TableProperties { return readableProperties; } - /** - * The offset of the value of each property in the file. - * - * @return the offset of each property. - */ - public Map getPropertiesOffsets() { - return propertiesOffsets; - } - @Override public boolean equals(Object o) { if (this == o) @@ -408,6 +399,7 @@ public class TableProperties { && oldestKeyTime == that.oldestKeyTime && slowCompressionEstimatedDataSize == that.slowCompressionEstimatedDataSize && fastCompressionEstimatedDataSize == that.fastCompressionEstimatedDataSize + && externalSstFileGlobalSeqnoOffset == that.externalSstFileGlobalSeqnoOffset && Arrays.equals(columnFamilyName, that.columnFamilyName) && Objects.equals(filterPolicyName, that.filterPolicyName) && Objects.equals(comparatorName, that.comparatorName) @@ -416,8 +408,7 @@ public class TableProperties { && Objects.equals(propertyCollectorsNames, that.propertyCollectorsNames) && Objects.equals(compressionName, that.compressionName) && Objects.equals(userCollectedProperties, that.userCollectedProperties) - && Objects.equals(readableProperties, that.readableProperties) - && Objects.equals(propertiesOffsets, that.propertiesOffsets); + && Objects.equals(readableProperties, that.readableProperties); } @Override @@ -426,9 +417,9 @@ public class TableProperties { indexKeyIsUserKey, indexValueIsDeltaEncoded, filterSize, rawKeySize, rawValueSize, numDataBlocks, numEntries, numDeletions, numMergeOperands, numRangeDeletions, formatVersion, fixedKeyLen, columnFamilyId, creationTime, oldestKeyTime, slowCompressionEstimatedDataSize, - fastCompressionEstimatedDataSize, filterPolicyName, comparatorName, mergeOperatorName, - prefixExtractorName, propertyCollectorsNames, compressionName, userCollectedProperties, - readableProperties, propertiesOffsets); + fastCompressionEstimatedDataSize, externalSstFileGlobalSeqnoOffset, filterPolicyName, + comparatorName, mergeOperatorName, prefixExtractorName, propertyCollectorsNames, + compressionName, userCollectedProperties, readableProperties); result = 31 * result + Arrays.hashCode(columnFamilyName); return result; } diff --git a/java/src/test/java/org/rocksdb/EventListenerTest.java b/java/src/test/java/org/rocksdb/EventListenerTest.java index 61193ff67..c6b8f940a 100644 --- a/java/src/test/java/org/rocksdb/EventListenerTest.java +++ b/java/src/test/java/org/rocksdb/EventListenerTest.java @@ -238,16 +238,14 @@ public class EventListenerTest { final Map userCollectedPropertiesTestData = Collections.singletonMap("key", "value"); final Map readablePropertiesTestData = Collections.singletonMap("key", "value"); - final Map propertiesOffsetsTestData = - Collections.singletonMap("key", TEST_LONG_VAL); final TableProperties tablePropertiesTestData = new TableProperties(TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, - TEST_LONG_VAL, TEST_LONG_VAL, "columnFamilyName".getBytes(), "filterPolicyName", - "comparatorName", "mergeOperatorName", "prefixExtractorName", "propertyCollectorsNames", - "compressionName", userCollectedPropertiesTestData, readablePropertiesTestData, - propertiesOffsetsTestData); + TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, "columnFamilyName".getBytes(), + "filterPolicyName", "comparatorName", "mergeOperatorName", "prefixExtractorName", + "propertyCollectorsNames", "compressionName", userCollectedPropertiesTestData, + readablePropertiesTestData); final FlushJobInfo flushJobInfoTestData = new FlushJobInfo(Integer.MAX_VALUE, "testColumnFamily", "/file/path", TEST_LONG_VAL, Integer.MAX_VALUE, true, true, TEST_LONG_VAL, TEST_LONG_VAL, tablePropertiesTestData, (byte) 0x0a); diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index d544be365..7ce118a5a 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -326,8 +326,10 @@ Status ReadTablePropertiesHelper( auto raw_val = iter.value(); auto pos = predefined_uint64_properties.find(key); - new_table_properties->properties_offsets.insert( - {key, handle.offset() + iter.ValueOffset()}); + if (key == ExternalSstFilePropertyNames::kGlobalSeqno) { + new_table_properties->external_sst_file_global_seqno_offset = + handle.offset() + iter.ValueOffset(); + } if (pos != predefined_uint64_properties.end()) { if (key == TablePropertiesNames::kDeletedKeys || @@ -382,12 +384,12 @@ Status ReadTablePropertiesHelper( s = VerifyBlockChecksum(footer.checksum(), properties_block.data(), block_size, file->file_name(), handle.offset()); if (s.IsCorruption()) { - const auto seqno_pos_iter = new_table_properties->properties_offsets.find( - ExternalSstFilePropertyNames::kGlobalSeqno); - if (seqno_pos_iter != new_table_properties->properties_offsets.end()) { + if (new_table_properties->external_sst_file_global_seqno_offset != 0) { std::string tmp_buf(properties_block.data(), block_fetcher.GetBlockSizeWithTrailer()); - uint64_t global_seqno_offset = seqno_pos_iter->second - handle.offset(); + uint64_t global_seqno_offset = + new_table_properties->external_sst_file_global_seqno_offset - + handle.offset(); EncodeFixed64(&tmp_buf[static_cast(global_seqno_offset)], 0); s = VerifyBlockChecksum(footer.checksum(), tmp_buf.data(), block_size, file->file_name(), handle.offset()); diff --git a/table/table_test.cc b/table/table_test.cc index c0b254137..5054c99bd 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -4490,8 +4490,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { user_props[ExternalSstFilePropertyNames::kVersion].c_str()); global_seqno = DecodeFixed64( user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str()); - global_seqno_offset = - props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno]; + global_seqno_offset = props->external_sst_file_global_seqno_offset; }; // Helper function to update the value of the global seqno in the file