diff --git a/db/builder.cc b/db/builder.cc index 1200cdf56..a8951a98e 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -50,7 +50,7 @@ Status BuildTable( const SequenceNumber newest_snapshot, const SequenceNumber earliest_seqno_in_memtable, const CompressionType compression, - const CompressionOptions& compression_opts, + const CompressionOptions& compression_opts, bool paranoid_file_checks, const Env::IOPriority io_priority) { Status s; meta->fd.file_size = 0; @@ -227,6 +227,11 @@ Status BuildTable( Iterator* it = table_cache->NewIterator(ReadOptions(), env_options, internal_comparator, meta->fd); s = it->status(); + if (s.ok() && paranoid_file_checks) { + for (it->SeekToFirst(); it->Valid(); it->Next()) {} + s = it->status(); + } + delete it; } } diff --git a/db/builder.h b/db/builder.h index ec3b1f530..50c86ab6c 100644 --- a/db/builder.h +++ b/db/builder.h @@ -52,7 +52,7 @@ extern Status BuildTable( const SequenceNumber newest_snapshot, const SequenceNumber earliest_seqno_in_memtable, const CompressionType compression, - const CompressionOptions& compression_opts, + const CompressionOptions& compression_opts, bool paranoid_file_checks, const Env::IOPriority io_priority = Env::IO_HIGH); } // namespace rocksdb diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 5c77d2321..00b27881c 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -984,6 +984,12 @@ Status CompactionJob::FinishCompactionOutputFile(Iterator* input) { Iterator* iter = cfd->table_cache()->NewIterator( ReadOptions(), env_options_, cfd->internal_comparator(), fd); s = iter->status(); + + if (s.ok() && mutable_cf_options_.paranoid_file_checks) { + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {} + s = iter->status(); + } + delete iter; if (s.ok()) { Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, diff --git a/db/db_test.cc b/db/db_test.cc index df631779c..0ebde90b1 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1587,6 +1587,50 @@ TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) { TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); } +TEST_F(DBTest, ParanoidFileChecks) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + options.level0_file_num_compaction_trigger = 2; + options.paranoid_file_checks = true; + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = false; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + ASSERT_OK(Put(1, "1_key", "val")); + ASSERT_OK(Put(1, "9_key", "val")); + // Create a new table. + ASSERT_OK(Flush(1)); + ASSERT_EQ(1, /* read and cache data block */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_OK(Put(1, "1_key2", "val2")); + ASSERT_OK(Put(1, "9_key2", "val2")); + // Create a new SST file. This will further trigger a compaction + // and generate another file. + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(3, /* Totally 3 files created up to now */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + // After disabling options.paranoid_file_checks. NO further block + // is added after generating a new file. + ASSERT_OK( + dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}})); + + ASSERT_OK(Put(1, "1_key3", "val3")); + ASSERT_OK(Put(1, "9_key3", "val3")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "1_key4", "val4")); + ASSERT_OK(Put(1, "9_key4", "val4")); + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(3, /* Totally 3 files created up to now */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); +} + TEST_F(DBTest, GetPropertiesOfAllTablesTest) { Options options = CurrentOptions(); options.max_background_flushes = 0; diff --git a/db/flush_job.cc b/db/flush_job.cc index 1a304fa9c..d8cb3eb04 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -187,7 +187,8 @@ Status FlushJob::WriteLevel0Table(const autovector& mems, cfd_->internal_comparator(), cfd_->int_tbl_prop_collector_factories(), newest_snapshot_, earliest_seqno_in_memtable, output_compression_, - cfd_->ioptions()->compression_opts, Env::IO_HIGH); + cfd_->ioptions()->compression_opts, + mutable_cf_options_.paranoid_file_checks, Env::IO_HIGH); LogFlush(db_options_.info_log); } Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, diff --git a/db/repair.cc b/db/repair.cc index ad5d21830..8b15eaab8 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -289,7 +289,7 @@ class Repairer { status = BuildTable(dbname_, env_, ioptions_, env_options_, table_cache_, iter.get(), &meta, icmp_, &int_tbl_prop_collector_factories_, 0, 0, - kNoCompression, CompressionOptions()); + kNoCompression, CompressionOptions(), false); } delete mem->Unref(); delete cf_mems_default; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index d8a0d885d..beee23a1a 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -705,6 +705,10 @@ struct ColumnFamilyOptions { // Default: false bool optimize_filters_for_hits; + // After writing every SST file, reopen it and read all the keys. + // Default: false + bool paranoid_file_checks; + #ifndef ROCKSDB_LITE // A vector of EventListeners which call-back functions will be called // when specific RocksDB event happens. diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index 593871583..20845d9d4 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -42,7 +42,8 @@ struct MutableCFOptions { max_mem_compaction_level(options.max_mem_compaction_level), verify_checksums_in_compaction(options.verify_checksums_in_compaction), max_sequential_skip_in_iterations( - options.max_sequential_skip_in_iterations) + options.max_sequential_skip_in_iterations), + paranoid_file_checks(options.paranoid_file_checks) { RefreshDerivedOptions(ioptions); } @@ -71,7 +72,8 @@ struct MutableCFOptions { max_bytes_for_level_multiplier(0), max_mem_compaction_level(0), verify_checksums_in_compaction(false), - max_sequential_skip_in_iterations(0) + max_sequential_skip_in_iterations(0), + paranoid_file_checks(false) {} // Must be called after any change to MutableCFOptions @@ -125,6 +127,7 @@ struct MutableCFOptions { // Misc options uint64_t max_sequential_skip_in_iterations; + bool paranoid_file_checks; // Derived options // Per-level target file size. diff --git a/util/options.cc b/util/options.cc index 2aa1d8b66..3f56d276e 100644 --- a/util/options.cc +++ b/util/options.cc @@ -128,7 +128,8 @@ ColumnFamilyOptions::ColumnFamilyOptions() bloom_locality(0), max_successive_merges(0), min_partial_merge_operands(2), - optimize_filters_for_hits(false) + optimize_filters_for_hits(false), + paranoid_file_checks(false) #ifndef ROCKSDB_LITE , listeners() { @@ -197,7 +198,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) bloom_locality(options.bloom_locality), max_successive_merges(options.max_successive_merges), min_partial_merge_operands(options.min_partial_merge_operands), - optimize_filters_for_hits(options.optimize_filters_for_hits) + optimize_filters_for_hits(options.optimize_filters_for_hits), + paranoid_file_checks(options.paranoid_file_checks) #ifndef ROCKSDB_LITE , listeners(options.listeners) { diff --git a/util/options_helper.cc b/util/options_helper.cc index 7f78ee2a3..18e1f38b2 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -225,6 +225,8 @@ bool ParseMiscOptions(const std::string& name, const std::string& value, OptionsType* new_options) { if (name == "max_sequential_skip_in_iterations") { new_options->max_sequential_skip_in_iterations = ParseUint64(value); + } else if (name == "paranoid_file_checks") { + new_options->paranoid_file_checks = ParseBoolean(name, value); } else { return false; }