From c7004840d2f4ad5fc1bdce042902b822492f3a0e Mon Sep 17 00:00:00 2001 From: Aaron Gao Date: Fri, 26 Aug 2016 11:46:32 -0700 Subject: [PATCH] store prefix_extractor_name in table Summary: Make sure prefix extractor name is stored in SST files and if DB is opened with a prefix extractor of a different name, prefix bloom is skipped when read the file. Also add unit tests for that. Test Plan: before change: ``` Note: Google Test filter = BlockBasedTableTest.SkipPrefixBloomFilter [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from BlockBasedTableTest [ RUN ] BlockBasedTableTest.SkipPrefixBloomFilter table/table_test.cc:1421: Failure Value of: db_iter->Valid() Actual: false Expected: true [ FAILED ] BlockBasedTableTest.SkipPrefixBloomFilter (1 ms) [----------] 1 test from BlockBasedTableTest (1 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test case ran. (1 ms total) [ PASSED ] 0 tests. [ FAILED ] 1 test, listed below: [ FAILED ] BlockBasedTableTest.SkipPrefixBloomFilter 1 FAILED TEST ``` after: ``` Note: Google Test filter = BlockBasedTableTest.SkipPrefixBloomFilter [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from BlockBasedTableTest [ RUN ] BlockBasedTableTest.SkipPrefixBloomFilter [ OK ] BlockBasedTableTest.SkipPrefixBloomFilter (0 ms) [----------] 1 test from BlockBasedTableTest (0 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test case ran. (0 ms total) [ PASSED ] 1 test. ``` Reviewers: sdong, andrewkr, yiwu, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D61215 --- include/rocksdb/table.h | 1 - include/rocksdb/table_properties.h | 5 ++++ table/block_based_table_builder.cc | 4 +++ table/block_based_table_reader.cc | 6 ++++- table/meta_blocks.cc | 8 +++++- table/plain_table_builder.cc | 9 +++---- table/plain_table_factory.cc | 3 --- table/plain_table_reader.cc | 9 ++++--- table/table_properties.cc | 2 ++ table/table_test.cc | 43 +++++++++++++++++++++++++++++- 10 files changed, 73 insertions(+), 17 deletions(-) diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 215385756..3bde6bdc5 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -225,7 +225,6 @@ enum EncodingType : char { // Table Properties that are specific to plain table properties. struct PlainTablePropertyNames { - static const std::string kPrefixExtractorName; static const std::string kEncodingType; static const std::string kBloomVersion; static const std::string kNumBloomBlocks; diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 5040dc4f2..c3f691a96 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -43,6 +43,7 @@ struct TablePropertiesNames { static const std::string kColumnFamilyId; static const std::string kComparator; static const std::string kMergeOperator; + static const std::string kPrefixExtractorName; static const std::string kPropertyCollectors; static const std::string kCompression; }; @@ -167,6 +168,10 @@ struct TableProperties { // If no merge operator is used, `merge_operator_name` will be "nullptr". std::string merge_operator_name; + // The name of the prefix extractor used in this table + // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr". + std::string prefix_extractor_name; + // The names of the property collectors factories used in this table // separated by commas // {collector_name[1]},{collector_name[2]},{collector_name[3]} .. diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 08ce299bb..f042134b4 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -894,6 +894,10 @@ Status BlockBasedTableBuilder::Finish() { ? r->ioptions.merge_operator->Name() : "nullptr"; r->props.compression_name = CompressionTypeToString(r->compression_type); + r->props.prefix_extractor_name = + r->ioptions.prefix_extractor != nullptr + ? r->ioptions.prefix_extractor->Name() + : "nullptr"; std::string property_collectors_names = "["; property_collectors_names = "["; diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 15ac6a7c5..edb886cee 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1317,7 +1317,9 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { assert(rep_->ioptions.prefix_extractor != nullptr); auto user_key = ExtractUserKey(internal_key); - if (!rep_->ioptions.prefix_extractor->InDomain(user_key)) { + if (!rep_->ioptions.prefix_extractor->InDomain(user_key) || + rep_->table_properties->prefix_extractor_name.compare( + rep_->ioptions.prefix_extractor->Name()) != 0) { return true; } auto prefix = rep_->ioptions.prefix_extractor->Transform(user_key); @@ -1423,6 +1425,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options, return filter->KeyMayMatch(user_key); } if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor && + rep_->table_properties->prefix_extractor_name.compare( + rep_->ioptions.prefix_extractor->Name()) == 0 && rep_->ioptions.prefix_extractor->InDomain(user_key) && !filter->PrefixMayMatch( rep_->ioptions.prefix_extractor->Transform(user_key))) { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index e98a638e0..22c9ff5db 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -82,6 +82,10 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { if (!props.merge_operator_name.empty()) { Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name); } + if (!props.prefix_extractor_name.empty()) { + Add(TablePropertiesNames::kPrefixExtractorName, + props.prefix_extractor_name); + } if (!props.property_collectors_names.empty()) { Add(TablePropertiesNames::kPropertyCollectors, props.property_collectors_names); @@ -150,7 +154,7 @@ bool NotifyCollectTableCollectorsOnFinish( } Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, - const Footer& footer, const ImmutableCFOptions &ioptions, + const Footer& footer, const ImmutableCFOptions& ioptions, TableProperties** table_properties) { assert(table_properties); @@ -232,6 +236,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, new_table_properties->comparator_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kMergeOperator) { new_table_properties->merge_operator_name = raw_val.ToString(); + } else if (key == TablePropertiesNames::kPrefixExtractorName) { + new_table_properties->prefix_extractor_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kPropertyCollectors) { new_table_properties->property_collectors_names = raw_val.ToString(); } else if (key == TablePropertiesNames::kCompression) { diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 80ee42e85..9f81c262e 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -97,12 +97,9 @@ PlainTableBuilder::PlainTableBuilder( properties_.format_version = (encoding_type == kPlain) ? 0 : 1; properties_.column_family_id = column_family_id; properties_.column_family_name = column_family_name; - - if (ioptions_.prefix_extractor) { - properties_.user_collected_properties - [PlainTablePropertyNames::kPrefixExtractorName] = - ioptions_.prefix_extractor->Name(); - } + properties_.prefix_extractor_name = ioptions_.prefix_extractor != nullptr + ? ioptions_.prefix_extractor->Name() + : "nullptr"; std::string val; PutFixed32(&val, static_cast(encoder_.GetEncodingType())); diff --git a/table/plain_table_factory.cc b/table/plain_table_factory.cc index 55a68fce1..eadc2c099 100644 --- a/table/plain_table_factory.cc +++ b/table/plain_table_factory.cc @@ -85,9 +85,6 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) { return new PlainTableFactory(options); } -const std::string PlainTablePropertyNames::kPrefixExtractorName = - "rocksdb.prefix.extractor.name"; - const std::string PlainTablePropertyNames::kEncodingType = "rocksdb.plain.table.encoding.type"; diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index a5155254b..d501606cf 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -135,15 +135,16 @@ Status PlainTableReader::Open(const ImmutableCFOptions& ioptions, assert(hash_table_ratio >= 0.0); auto& user_props = props->user_collected_properties; - auto prefix_extractor_in_file = - user_props.find(PlainTablePropertyNames::kPrefixExtractorName); + auto prefix_extractor_in_file = props->prefix_extractor_name; - if (!full_scan_mode && prefix_extractor_in_file != user_props.end()) { + if (!full_scan_mode && + !prefix_extractor_in_file.empty() /* old version sst file*/ + && prefix_extractor_in_file != "nullptr") { if (!ioptions.prefix_extractor) { return Status::InvalidArgument( "Prefix extractor is missing when opening a PlainTable built " "using a prefix extractor"); - } else if (prefix_extractor_in_file->second.compare( + } else if (prefix_extractor_in_file.compare( ioptions.prefix_extractor->Name()) != 0) { return Status::InvalidArgument( "Prefix extractor given doesn't match the one used to build " diff --git a/table/table_properties.cc b/table/table_properties.cc index 6c0a9adfc..c1b02b16a 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -168,6 +168,8 @@ const std::string TablePropertiesNames::kColumnFamilyName = const std::string TablePropertiesNames::kComparator = "rocksdb.comparator"; const std::string TablePropertiesNames::kMergeOperator = "rocksdb.merge.operator"; +const std::string TablePropertiesNames::kPrefixExtractorName = + "rocksdb.prefix.extractor.name"; const std::string TablePropertiesNames::kPropertyCollectors = "rocksdb.property.collectors"; const std::string TablePropertiesNames::kCompression = "rocksdb.compression"; diff --git a/table/table_test.cc b/table/table_test.cc index a5efca69a..16f46fa65 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1101,6 +1101,8 @@ TEST_F(BlockBasedTableTest, BlockBasedTableProperties2) { ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name); // No merge operator ASSERT_EQ("nullptr", props.merge_operator_name); + // No prefix extractor + ASSERT_EQ("nullptr", props.prefix_extractor_name); // No property collectors ASSERT_EQ("[]", props.property_collectors_names); // No filter policy is used @@ -1116,6 +1118,7 @@ TEST_F(BlockBasedTableTest, BlockBasedTableProperties2) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.comparator = &reverse_key_comparator; options.merge_operator = MergeOperators::CreateUInt64AddOperator(); + options.prefix_extractor.reset(NewNoopTransform()); options.table_properties_collector_factories.emplace_back( new DummyPropertiesCollectorFactory1()); options.table_properties_collector_factories.emplace_back( @@ -1129,6 +1132,7 @@ TEST_F(BlockBasedTableTest, BlockBasedTableProperties2) { ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name); ASSERT_EQ("UInt64AddOperator", props.merge_operator_name); + ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name); ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]", props.property_collectors_names); ASSERT_EQ("", props.filter_policy_name); // no filter policy is used @@ -1452,6 +1456,44 @@ TEST_F(BlockBasedTableTest, NoopTransformSeek) { } } +TEST_F(BlockBasedTableTest, SkipPrefixBloomFilter) { + // if DB is opened with a prefix extractor of a different name, + // prefix bloom is skipped when read the file + BlockBasedTableOptions table_options; + table_options.filter_policy.reset(NewBloomFilterPolicy(2)); + table_options.whole_key_filtering = false; + + Options options; + options.comparator = BytewiseComparator(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + + TableConstructor c(options.comparator); + InternalKey key("abcdefghijk", 1, kTypeValue); + c.Add(key.Encode().ToString(), "test"); + std::vector keys; + stl_wrappers::KVMap kvmap; + const ImmutableCFOptions ioptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, table_options, internal_comparator, &keys, + &kvmap); + options.prefix_extractor.reset(NewFixedPrefixTransform(9)); + const ImmutableCFOptions new_ioptions(options); + c.Reopen(new_ioptions); + auto reader = c.GetTableReader(); + std::unique_ptr db_iter(reader->NewIterator(ReadOptions())); + + // Test point lookup + // only one kv + for (auto& kv : kvmap) { + db_iter->Seek(kv.first); + ASSERT_TRUE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); + ASSERT_EQ(db_iter->key(), kv.first); + ASSERT_EQ(db_iter->value(), kv.second); + } +} + static std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); @@ -2660,7 +2702,6 @@ TEST_F(PrefixTest, PrefixAndWholeKeyTest) { rocksdb::BlockBasedTableOptions bbto; bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10)); bbto.block_size = 262144; - bbto.whole_key_filtering = true; const std::string kDBPath = test::TmpDir() + "/table_prefix_test";