diff --git a/HISTORY.md b/HISTORY.md index 2b4ded59f..0365d1474 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -12,6 +12,9 @@ ### Performance Improvements * On ARM platform, use `yield` instead of `wfe` to relax cpu to gain better performance. +### Public API change +* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. + ## 6.19.0 (03/21/2021) ### Bug Fixes * Fixed the truncation error found in APIs/tools when dumping block-based SST files in a human-readable format. After fix, the block-based table can be fully dumped as a readable file. diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 6f4eb777a..69c2f6b1b 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -1520,6 +1520,80 @@ TEST_F(DBPropertiesTest, BlockAddForCompressionSampling) { } } +class CompressionSamplingDBPropertiesTest + : public DBPropertiesTest, + public ::testing::WithParamInterface { + public: + CompressionSamplingDBPropertiesTest() : fast_(GetParam()) {} + + protected: + const bool fast_; +}; + +INSTANTIATE_TEST_CASE_P(CompressionSamplingDBPropertiesTest, + CompressionSamplingDBPropertiesTest, ::testing::Bool()); + +// Excluded from RocksDB lite tests due to `GetPropertiesOfAllTables()` usage. +TEST_P(CompressionSamplingDBPropertiesTest, + EstimateDataSizeWithCompressionSampling) { + Options options = CurrentOptions(); + if (fast_) { + // One of the following light compression libraries must be present. + if (LZ4_Supported()) { + options.compression = kLZ4Compression; + } else if (Snappy_Supported()) { + options.compression = kSnappyCompression; + } else { + return; + } + } else { + // One of the following heavy compression libraries must be present. + if (ZSTD_Supported()) { + options.compression = kZSTD; + } else if (Zlib_Supported()) { + options.compression = kZlibCompression; + } else { + return; + } + } + options.disable_auto_compactions = true; + // For simplicity/determinism, sample 100%. + options.sample_for_compression = 1; + Reopen(options); + + // Setup the following LSM: + // + // L0_0 ["a", "b"] + // L1_0 ["a", "b"] + // + // L0_0 was created by flush. L1_0 was created by compaction. Each file + // contains one data block. The value consists of compressible data so the + // data block should be stored compressed. + std::string val(1024, 'a'); + for (int i = 0; i < 3; ++i) { + ASSERT_OK(Put("a", val)); + ASSERT_OK(Put("b", val)); + ASSERT_OK(Flush()); + if (i == 1) { + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + } + } + + TablePropertiesCollection file_to_props; + ASSERT_OK(db_->GetPropertiesOfAllTables(&file_to_props)); + ASSERT_EQ(2, file_to_props.size()); + for (const auto& file_and_props : file_to_props) { + ASSERT_GT(file_and_props.second->data_size, 0); + if (fast_) { + ASSERT_EQ(file_and_props.second->data_size, + file_and_props.second->fast_compression_estimated_data_size); + } else { + ASSERT_EQ(file_and_props.second->data_size, + file_and_props.second->slow_compression_estimated_data_size); + } + } +} + TEST_F(DBPropertiesTest, EstimateNumKeysUnderflow) { Options options = CurrentOptions(); Reopen(options); diff --git a/db/event_helpers.cc b/db/event_helpers.cc index 23fb43a83..9c7c71797 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -125,8 +125,12 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( << table_properties.compression_options << "creation_time" << table_properties.creation_time << "oldest_key_time" << table_properties.oldest_key_time << "file_creation_time" - << table_properties.file_creation_time << "db_id" - << table_properties.db_id << "db_session_id" + << table_properties.file_creation_time + << "slow_compression_estimated_data_size" + << table_properties.slow_compression_estimated_data_size + << "fast_compression_estimated_data_size" + << table_properties.fast_compression_estimated_data_size + << "db_id" << table_properties.db_id << "db_session_id" << table_properties.db_session_id; // user collected properties diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 524ebdaa6..8987f420e 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -61,6 +61,8 @@ struct TablePropertiesNames { static const std::string kCreationTime; static const std::string kOldestKeyTime; static const std::string kFileCreationTime; + static const std::string kSlowCompressionEstimatedDataSize; + static const std::string kFastCompressionEstimatedDataSize; }; extern const std::string kPropertiesBlock; @@ -195,6 +197,14 @@ struct TableProperties { uint64_t oldest_key_time = 0; // Actual SST file creation time. 0 means unknown. uint64_t file_creation_time = 0; + // Estimated size of data blocks if compressed using a relatively slower + // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`). + // 0 means unknown. + uint64_t slow_compression_estimated_data_size = 0; + // Estimated size of data blocks if compressed using a relatively faster + // compression algorithm (see `ColumnFamilyOptions::sample_for_compression`). + // 0 means unknown. + uint64_t fast_compression_estimated_data_size = 0; // DB identity // db_id is an identifier generated the first time the DB is created diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 8f4ebcaf0..330b9e9bd 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -6075,7 +6075,11 @@ class TablePropertiesJni : public JavaClass { return nullptr; } - jmethodID mid = env->GetMethodID(jclazz, "", "(JJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/util/Map;Ljava/util/Map;Ljava/util/Map;)V"); + jmethodID mid = env->GetMethodID( + jclazz, "", + "(JJJJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/" + "lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/" + "String;Ljava/util/Map;Ljava/util/Map;Ljava/util/Map;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -6201,8 +6205,8 @@ class TablePropertiesJni : public JavaClass { return nullptr; } - jobject jtable_properties = env->NewObject(jclazz, mid, - static_cast(table_properties.data_size), + jobject jtable_properties = env->NewObject( + jclazz, mid, static_cast(table_properties.data_size), static_cast(table_properties.index_size), static_cast(table_properties.index_partitions), static_cast(table_properties.top_level_index_size), @@ -6221,17 +6225,14 @@ class TablePropertiesJni : public JavaClass { static_cast(table_properties.column_family_id), static_cast(table_properties.creation_time), static_cast(table_properties.oldest_key_time), - jcolumn_family_name, - jfilter_policy_name, - jcomparator_name, - jmerge_operator_name, - jprefix_extractor_name, - jproperty_collectors_names, - jcompression_name, - juser_collected_properties, - jreadable_properties, - jproperties_offsets - ); + static_cast( + table_properties.slow_compression_estimated_data_size), + static_cast( + table_properties.fast_compression_estimated_data_size), + jcolumn_family_name, jfilter_policy_name, jcomparator_name, + jmerge_operator_name, jprefix_extractor_name, + jproperty_collectors_names, jcompression_name, + juser_collected_properties, jreadable_properties, jproperties_offsets); if (env->ExceptionCheck()) { return nullptr; diff --git a/java/rocksjni/testable_event_listener.cc b/java/rocksjni/testable_event_listener.cc index 759e61766..2540f2ecb 100644 --- a/java/rocksjni/testable_event_listener.cc +++ b/java/rocksjni/testable_event_listener.cc @@ -35,6 +35,8 @@ static TableProperties newTablePropertiesForTest() { table_properties.creation_time = UINT64_MAX; table_properties.oldest_key_time = UINT64_MAX; table_properties.file_creation_time = UINT64_MAX; + table_properties.slow_compression_estimated_data_size = UINT64_MAX; + table_properties.fast_compression_estimated_data_size = UINT64_MAX; table_properties.db_id = "dbId"; table_properties.db_session_id = "sessionId"; table_properties.column_family_name = "columnFamilyName"; diff --git a/java/src/main/java/org/rocksdb/TableProperties.java b/java/src/main/java/org/rocksdb/TableProperties.java index 8e0f477b8..c1baea2a4 100644 --- a/java/src/main/java/org/rocksdb/TableProperties.java +++ b/java/src/main/java/org/rocksdb/TableProperties.java @@ -29,6 +29,8 @@ public class TableProperties { private final long columnFamilyId; private final long creationTime; private final long oldestKeyTime; + private final long slowCompressionEstimatedDataSize; + private final long fastCompressionEstimatedDataSize; private final byte[] columnFamilyName; private final String filterPolicyName; private final String comparatorName; @@ -50,10 +52,12 @@ public class TableProperties { final long rawValueSize, final long numDataBlocks, final long numEntries, final long numDeletions, final long numMergeOperands, final long numRangeDeletions, final long formatVersion, final long fixedKeyLen, final long columnFamilyId, - final long creationTime, final long oldestKeyTime, final byte[] columnFamilyName, - final String filterPolicyName, final String comparatorName, final String mergeOperatorName, - final String prefixExtractorName, final String propertyCollectorsNames, - final String compressionName, final Map userCollectedProperties, + final long creationTime, final long oldestKeyTime, + final long slowCompressionEstimatedDataSize, final long fastCompressionEstimatedDataSize, + final byte[] columnFamilyName, final String filterPolicyName, final String comparatorName, + final String mergeOperatorName, final String prefixExtractorName, + final String propertyCollectorsNames, final String compressionName, + final Map userCollectedProperties, final Map readableProperties, final Map propertiesOffsets) { this.dataSize = dataSize; this.indexSize = indexSize; @@ -74,6 +78,8 @@ public class TableProperties { this.columnFamilyId = columnFamilyId; this.creationTime = creationTime; this.oldestKeyTime = oldestKeyTime; + this.slowCompressionEstimatedDataSize = slowCompressionEstimatedDataSize; + this.fastCompressionEstimatedDataSize = fastCompressionEstimatedDataSize; this.columnFamilyName = columnFamilyName; this.filterPolicyName = filterPolicyName; this.comparatorName = comparatorName; @@ -266,6 +272,26 @@ public class TableProperties { return oldestKeyTime; } + /** + * Get the estimated size of data blocks compressed with a relatively slower + * compression algorithm. + * + * @return 0 means unknown, otherwise the timestamp. + */ + public long getSlowCompressionEstimatedDataSize() { + return slowCompressionEstimatedDataSize; + } + + /** + * Get the estimated size of data blocks compressed with a relatively faster + * compression algorithm. + * + * @return 0 means unknown, otherwise the timestamp. + */ + public long getFastCompressionEstimatedDataSize() { + return fastCompressionEstimatedDataSize; + } + /** * Get the name of the column family with which this * SST file is associated. @@ -380,6 +406,8 @@ public class TableProperties { && formatVersion == that.formatVersion && fixedKeyLen == that.fixedKeyLen && columnFamilyId == that.columnFamilyId && creationTime == that.creationTime && oldestKeyTime == that.oldestKeyTime + && slowCompressionEstimatedDataSize == that.slowCompressionEstimatedDataSize + && fastCompressionEstimatedDataSize == that.fastCompressionEstimatedDataSize && Arrays.equals(columnFamilyName, that.columnFamilyName) && Objects.equals(filterPolicyName, that.filterPolicyName) && Objects.equals(comparatorName, that.comparatorName) @@ -397,9 +425,10 @@ public class TableProperties { int result = Objects.hash(dataSize, indexSize, indexPartitions, topLevelIndexSize, indexKeyIsUserKey, indexValueIsDeltaEncoded, filterSize, rawKeySize, rawValueSize, numDataBlocks, numEntries, numDeletions, numMergeOperands, numRangeDeletions, formatVersion, - fixedKeyLen, columnFamilyId, creationTime, oldestKeyTime, filterPolicyName, comparatorName, - mergeOperatorName, prefixExtractorName, propertyCollectorsNames, compressionName, - userCollectedProperties, readableProperties, propertiesOffsets); + fixedKeyLen, columnFamilyId, creationTime, oldestKeyTime, slowCompressionEstimatedDataSize, + fastCompressionEstimatedDataSize, filterPolicyName, comparatorName, mergeOperatorName, + prefixExtractorName, propertyCollectorsNames, compressionName, userCollectedProperties, + readableProperties, propertiesOffsets); result = 31 * result + Arrays.hashCode(columnFamilyName); return result; } diff --git a/java/src/test/java/org/rocksdb/EventListenerTest.java b/java/src/test/java/org/rocksdb/EventListenerTest.java index f7e907cae..61193ff67 100644 --- a/java/src/test/java/org/rocksdb/EventListenerTest.java +++ b/java/src/test/java/org/rocksdb/EventListenerTest.java @@ -244,9 +244,10 @@ public class EventListenerTest { TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, - "columnFamilyName".getBytes(), "filterPolicyName", "comparatorName", "mergeOperatorName", - "prefixExtractorName", "propertyCollectorsNames", "compressionName", - userCollectedPropertiesTestData, readablePropertiesTestData, propertiesOffsetsTestData); + TEST_LONG_VAL, TEST_LONG_VAL, "columnFamilyName".getBytes(), "filterPolicyName", + "comparatorName", "mergeOperatorName", "prefixExtractorName", "propertyCollectorsNames", + "compressionName", userCollectedPropertiesTestData, readablePropertiesTestData, + propertiesOffsetsTestData); final FlushJobInfo flushJobInfoTestData = new FlushJobInfo(Integer.MAX_VALUE, "testColumnFamily", "/file/path", TEST_LONG_VAL, Integer.MAX_VALUE, true, true, TEST_LONG_VAL, TEST_LONG_VAL, tablePropertiesTestData, (byte) 0x0a); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index d62d58d0f..4b9e7dec1 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -271,6 +271,11 @@ struct BlockBasedTableBuilder::Rep { const Slice* first_key_in_next_block = nullptr; CompressionType compression_type; uint64_t sample_for_compression; + std::atomic compressible_input_data_bytes; + std::atomic uncompressible_input_data_bytes; + std::atomic sampled_input_data_bytes; + std::atomic sampled_output_slow_data_bytes; + std::atomic sampled_output_fast_data_bytes; CompressionOptions compression_opts; std::unique_ptr compression_dict; std::vector> compression_ctxs; @@ -431,6 +436,11 @@ struct BlockBasedTableBuilder::Rep { internal_prefix_transform(_moptions.prefix_extractor.get()), compression_type(_compression_type), sample_for_compression(_moptions.sample_for_compression), + compressible_input_data_bytes(0), + uncompressible_input_data_bytes(0), + sampled_input_data_bytes(0), + sampled_output_slow_data_bytes(0), + sampled_output_fast_data_bytes(0), compression_opts(_compression_opts), compression_dict(), compression_ctxs(_compression_opts.parallel_threads), @@ -1085,6 +1095,10 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock( ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)); if (is_status_ok && raw_block_contents.size() < kCompressionSizeLimit) { + if (is_data_block) { + r->compressible_input_data_bytes.fetch_add(raw_block_contents.size(), + std::memory_order_relaxed); + } const CompressionDict* compression_dict; if (!is_data_block || r->compression_dict == nullptr) { compression_dict = &CompressionDict::GetEmptyDict(); @@ -1103,6 +1117,16 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock( r->table_options.format_version, is_data_block /* do_sample */, compressed_output, &sampled_output_fast, &sampled_output_slow); + if (sampled_output_slow.size() > 0 || sampled_output_fast.size() > 0) { + // Currently compression sampling is only enabled for data block. + assert(is_data_block); + r->sampled_input_data_bytes.fetch_add(raw_block_contents.size(), + std::memory_order_relaxed); + r->sampled_output_slow_data_bytes.fetch_add(sampled_output_slow.size(), + std::memory_order_relaxed); + r->sampled_output_fast_data_bytes.fetch_add(sampled_output_fast.size(), + std::memory_order_relaxed); + } // notify collectors on block add NotifyCollectTableCollectorsOnBlockAdd( r->table_properties_collectors, raw_block_contents.size(), @@ -1146,8 +1170,16 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock( } } else { // Block is too big to be compressed. + if (is_data_block) { + r->uncompressible_input_data_bytes.fetch_add(raw_block_contents.size(), + std::memory_order_relaxed); + } abort_compression = true; } + if (is_data_block) { + r->uncompressible_input_data_bytes.fetch_add(kBlockTrailerSize, + std::memory_order_relaxed); + } // Abort compression if the block is too big, or did not pass // verification. @@ -1539,6 +1571,27 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->props.creation_time = rep_->creation_time; rep_->props.oldest_key_time = rep_->oldest_key_time; rep_->props.file_creation_time = rep_->file_creation_time; + if (rep_->sampled_input_data_bytes > 0) { + rep_->props.slow_compression_estimated_data_size = static_cast( + static_cast(rep_->sampled_output_slow_data_bytes) / + rep_->sampled_input_data_bytes * + rep_->compressible_input_data_bytes + + rep_->uncompressible_input_data_bytes + 0.5); + rep_->props.fast_compression_estimated_data_size = static_cast( + static_cast(rep_->sampled_output_fast_data_bytes) / + rep_->sampled_input_data_bytes * + rep_->compressible_input_data_bytes + + rep_->uncompressible_input_data_bytes + 0.5); + } else if (rep_->sample_for_compression > 0) { + // We tried to sample but none were found. Assume worst-case (compression + // ratio 1.0) so data is complete and aggregatable. + rep_->props.slow_compression_estimated_data_size = + rep_->compressible_input_data_bytes + + rep_->uncompressible_input_data_bytes; + rep_->props.fast_compression_estimated_data_size = + rep_->compressible_input_data_bytes + + rep_->uncompressible_input_data_bytes; + } rep_->props.db_id = rep_->db_id; rep_->props.db_session_id = rep_->db_session_id; rep_->props.db_host_id = rep_->db_host_id; diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 5b5fb057d..7fcf4676d 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -96,6 +96,14 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { if (props.file_creation_time > 0) { Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time); } + if (props.slow_compression_estimated_data_size > 0) { + Add(TablePropertiesNames::kSlowCompressionEstimatedDataSize, + props.slow_compression_estimated_data_size); + } + if (props.fast_compression_estimated_data_size > 0) { + Add(TablePropertiesNames::kFastCompressionEstimatedDataSize, + props.fast_compression_estimated_data_size); + } if (!props.db_id.empty()) { Add(TablePropertiesNames::kDbId, props.db_id); } @@ -279,6 +287,10 @@ Status ReadProperties(const ReadOptions& read_options, &new_table_properties->oldest_key_time}, {TablePropertiesNames::kFileCreationTime, &new_table_properties->file_creation_time}, + {TablePropertiesNames::kSlowCompressionEstimatedDataSize, + &new_table_properties->slow_compression_estimated_data_size}, + {TablePropertiesNames::kFastCompressionEstimatedDataSize, + &new_table_properties->fast_compression_estimated_data_size}, }; std::string last_key; diff --git a/table/table_properties.cc b/table/table_properties.cc index 310fb4a0e..bad4f2c4f 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -168,6 +168,11 @@ std::string TableProperties::ToString( AppendProperty(result, "file creation time", file_creation_time, prop_delim, kv_delim); + AppendProperty(result, "slow compression estimated data size", + slow_compression_estimated_data_size, prop_delim, kv_delim); + AppendProperty(result, "fast compression estimated data size", + fast_compression_estimated_data_size, prop_delim, kv_delim); + // DB identity and DB session ID AppendProperty(result, "DB identity", db_id, prop_delim, kv_delim); AppendProperty(result, "DB session identity", db_session_id, prop_delim, @@ -191,6 +196,10 @@ void TableProperties::Add(const TableProperties& tp) { num_deletions += tp.num_deletions; num_merge_operands += tp.num_merge_operands; num_range_deletions += tp.num_range_deletions; + slow_compression_estimated_data_size += + tp.slow_compression_estimated_data_size; + fast_compression_estimated_data_size += + tp.fast_compression_estimated_data_size; } std::map @@ -208,6 +217,10 @@ TableProperties::GetAggregatablePropertiesAsMap() const { rv["num_deletions"] = num_deletions; rv["num_merge_operands"] = num_merge_operands; rv["num_range_deletions"] = num_range_deletions; + rv["slow_compression_estimated_data_size"] = + slow_compression_estimated_data_size; + rv["fast_compression_estimated_data_size"] = + fast_compression_estimated_data_size; return rv; } @@ -268,6 +281,10 @@ const std::string TablePropertiesNames::kOldestKeyTime = "rocksdb.oldest.key.time"; const std::string TablePropertiesNames::kFileCreationTime = "rocksdb.file.creation.time"; +const std::string TablePropertiesNames::kSlowCompressionEstimatedDataSize = + "rocksdb.sample_for_compression.slow.data.size"; +const std::string TablePropertiesNames::kFastCompressionEstimatedDataSize = + "rocksdb.sample_for_compression.fast.data.size"; extern const std::string kPropertiesBlock = "rocksdb.properties"; // Old property block name for backward compatibility