Verify checksum before ingestion (#4916)

Summary:
before file ingestion (in preparation phase), verify the checksums of
the blocks of the external SST file, including properties block with global
seqno.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4916

Differential Revision: D13863501

Pulled By: riversand963

fbshipit-source-id: dc54697f970e3807832e2460f7228fcc7efe81ee
main
Yanqin Jin 6 years ago committed by Facebook Github Bot
parent d0d484b132
commit 158da7a6ee
  1. 1
      HISTORY.md
  2. 359
      db/external_sst_file_basic_test.cc
  3. 7
      db/external_sst_file_ingestion_job.cc
  4. 150
      db/external_sst_file_test.cc
  5. 7
      include/rocksdb/options.h
  6. 3
      table/block_based_table_builder.cc

@ -6,6 +6,7 @@
* RocksDB may choose to preopen some files even if options.max_open_files != -1. This may make DB open slightly longer.
* For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds.
* For all users of dictionary compression who set `cache_index_and_filter_blocks == true`, we now store dictionary data used for decompression in the block cache for better control over memory usage. For users of ZSTD v1.1.4+ who compile with -DZSTD_STATIC_LINKING_ONLY, this includes a digested dictionary, which is used to increase decompression speed.
* Add support for block checksums verification for external SST files before ingestion.
### Public API Change
* CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first.

@ -14,8 +14,9 @@
namespace rocksdb {
#ifndef ROCKSDB_LITE
class ExternalSSTFileBasicTest : public DBTestBase,
public ::testing::WithParamInterface<bool> {
class ExternalSSTFileBasicTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public:
ExternalSSTFileBasicTest() : DBTestBase("/external_sst_file_basic_test") {
sst_files_dir_ = dbname_ + "/sst_files/";
@ -42,7 +43,8 @@ class ExternalSSTFileBasicTest : public DBTestBase,
const Options options, std::vector<int> keys,
const std::vector<ValueType>& value_types,
std::vector<std::pair<int, int>> range_deletions, int file_id,
bool write_global_seqno, std::map<std::string, std::string>* true_data) {
bool write_global_seqno, bool verify_checksums_before_ingest,
std::map<std::string, std::string>* true_data) {
assert(value_types.size() == 1 || keys.size() == value_types.size());
std::string file_path = sst_files_dir_ + ToString(file_id);
SstFileWriter sst_file_writer(EnvOptions(), options);
@ -107,6 +109,7 @@ class ExternalSSTFileBasicTest : public DBTestBase,
IngestExternalFileOptions ifo;
ifo.allow_global_seqno = true;
ifo.write_global_seqno = write_global_seqno;
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
s = db_->IngestExternalFile({file_path}, ifo);
}
return s;
@ -115,18 +118,20 @@ class ExternalSSTFileBasicTest : public DBTestBase,
Status GenerateAndAddExternalFile(
const Options options, std::vector<int> keys,
const std::vector<ValueType>& value_types, int file_id,
bool write_global_seqno, std::map<std::string, std::string>* true_data) {
return GenerateAndAddExternalFile(options, keys, value_types, {}, file_id,
write_global_seqno, true_data);
bool write_global_seqno, bool verify_checksums_before_ingest,
std::map<std::string, std::string>* true_data) {
return GenerateAndAddExternalFile(
options, keys, value_types, {}, file_id, write_global_seqno,
verify_checksums_before_ingest, true_data);
}
Status GenerateAndAddExternalFile(
const Options options, std::vector<int> keys, const ValueType value_type,
int file_id, bool write_global_seqno,
int file_id, bool write_global_seqno, bool verify_checksums_before_ingest,
std::map<std::string, std::string>* true_data) {
return GenerateAndAddExternalFile(options, keys,
std::vector<ValueType>(1, value_type),
file_id, write_global_seqno, true_data);
return GenerateAndAddExternalFile(
options, keys, std::vector<ValueType>(1, value_type), file_id,
write_global_seqno, verify_checksums_before_ingest, true_data);
}
~ExternalSSTFileBasicTest() { test::DestroyDir(env_, sst_files_dir_); }
@ -249,7 +254,8 @@ TEST_F(ExternalSSTFileBasicTest, NoCopy) {
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
do {
Options options = CurrentOptions();
DestroyAndReopen(options);
@ -257,39 +263,39 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
int file_id = 1;
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2, 3, 4, 5, 6},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
ASSERT_OK(GenerateAndAddExternalFile(options, {10, 11, 12, 13},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4, 6},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 4, 6}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
ASSERT_OK(GenerateAndAddExternalFile(options, {11, 15, 19},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {11, 15, 19}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(options, {120, 130},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {120, 130}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 130},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
@ -300,21 +306,21 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
}
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(GenerateAndAddExternalFile(options, {60, 61, 62},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
ASSERT_OK(GenerateAndAddExternalFile(options, {40, 41, 42},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {40, 41, 42}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
ASSERT_OK(GenerateAndAddExternalFile(options, {20, 30, 40},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {20, 30, 40}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
@ -322,29 +328,29 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
// We will need a seqno for the file regardless if the file overwrite
// keys in the DB or not because we have a snapshot
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1002},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1000, 1002}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
ASSERT_OK(GenerateAndAddExternalFile(options, {2000, 3002},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {2000, 3002}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 20, 40, 100, 150},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
db_->ReleaseSnapshot(snapshot);
ASSERT_OK(GenerateAndAddExternalFile(options, {5000, 5001},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {5000, 5001}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// No snapshot anymore, no need to assign a seqno
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
@ -354,7 +360,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
do {
Options options = CurrentOptions();
options.merge_operator.reset(new TestPutOperator());
@ -363,59 +370,59 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
int file_id = 1;
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2, 3, 4, 5, 6},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
ASSERT_OK(GenerateAndAddExternalFile(options, {10, 11, 12, 13},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4, 6},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 4, 6}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
ASSERT_OK(GenerateAndAddExternalFile(options, {11, 15, 19},
ValueType::kTypeDeletion, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(options, {120, 130},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {120, 130}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 130},
ValueType::kTypeDeletion, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 130}, ValueType::kTypeDeletion, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
ASSERT_OK(GenerateAndAddExternalFile(
options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++,
write_global_seqno, &true_data));
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{110, 120}},
file_id++, write_global_seqno,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {}, {}, {{110, 120}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// The range deletion ends on a key, but it doesn't actually delete
// this key because the largest key in the range is exclusive. Still,
// it counts as an overlap so a new seqno will be assigned.
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{100, 109}},
file_id++, write_global_seqno,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {}, {}, {{100, 109}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
@ -426,21 +433,21 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
}
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(GenerateAndAddExternalFile(options, {60, 61, 62},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
ASSERT_OK(GenerateAndAddExternalFile(options, {40, 41, 42},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {40, 41, 42}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
ASSERT_OK(GenerateAndAddExternalFile(options, {20, 30, 40},
ValueType::kTypeDeletion, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
@ -448,29 +455,29 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
// We will need a seqno for the file regardless if the file overwrite
// keys in the DB or not because we have a snapshot
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1002},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1000, 1002}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
ASSERT_OK(GenerateAndAddExternalFile(options, {2000, 3002},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {2000, 3002}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 20, 40, 100, 150},
ValueType::kTypeMerge, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
db_->ReleaseSnapshot(snapshot);
ASSERT_OK(GenerateAndAddExternalFile(options, {5000, 5001},
ValueType::kTypeValue, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {5000, 5001}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data));
// No snapshot anymore, no need to assign a seqno
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
@ -480,7 +487,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
do {
Options options = CurrentOptions();
options.merge_operator.reset(new TestPutOperator());
@ -493,7 +501,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {1, 2, 3, 4, 5, 6},
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
@ -501,7 +510,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {10, 11, 12, 13},
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
@ -509,7 +519,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {1, 4, 6},
{ValueType::kTypeDeletion, ValueType::kTypeValue,
ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
@ -517,19 +528,22 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {11, 15, 19},
{ValueType::kTypeDeletion, ValueType::kTypeMerge,
ValueType::kTypeValue},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(
options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
@ -537,14 +551,16 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {150, 151, 152},
{ValueType::kTypeValue, ValueType::kTypeMerge,
ValueType::kTypeDeletion},
{{150, 160}, {180, 190}}, file_id++, write_global_seqno, &true_data));
{{150, 160}, {180, 190}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
ASSERT_OK(GenerateAndAddExternalFile(
options, {150, 151, 152},
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
{{200, 250}}, file_id++, write_global_seqno, &true_data));
{{200, 250}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
@ -552,7 +568,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {300, 301, 302},
{ValueType::kTypeValue, ValueType::kTypeMerge,
ValueType::kTypeDeletion},
{{1, 2}, {152, 154}}, file_id++, write_global_seqno, &true_data));
{{1, 2}, {152, 154}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
@ -566,7 +583,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
ASSERT_OK(GenerateAndAddExternalFile(
options, {60, 61, 62},
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File doesn't overwrite any keys, no seqno needed
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
@ -574,7 +592,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {40, 41, 42},
{ValueType::kTypeValue, ValueType::kTypeDeletion,
ValueType::kTypeDeletion},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
@ -582,7 +601,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {20, 30, 40},
{ValueType::kTypeDeletion, ValueType::kTypeDeletion,
ValueType::kTypeDeletion},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// File overwrites some keys, a seqno will be assigned
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
@ -592,13 +612,15 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
// keys in the DB or not because we have a snapshot
ASSERT_OK(GenerateAndAddExternalFile(
options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
ASSERT_OK(GenerateAndAddExternalFile(
options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
@ -606,7 +628,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
options, {1, 20, 40, 100, 150},
{ValueType::kTypeDeletion, ValueType::kTypeDeletion,
ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// A global seqno will be assigned anyway because of the snapshot
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
@ -614,7 +637,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
ASSERT_OK(GenerateAndAddExternalFile(
options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
// No snapshot anymore, no need to assign a seqno
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
@ -690,13 +714,15 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1));
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// overlaps with L0 file but not memtable, so flush is skipped and file is
// ingested into L0
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
ASSERT_OK(GenerateAndAddExternalFile(
options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue},
{{65, 70}, {70, 85}}, file_id++, write_global_seqno, &true_data));
{{65, 70}, {70, 85}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
ASSERT_EQ(2, NumTableFilesAtLevel(0));
ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
@ -706,7 +732,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
// file is ingested into L5
ASSERT_OK(GenerateAndAddExternalFile(
options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
ASSERT_EQ(2, NumTableFilesAtLevel(0));
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
@ -714,8 +741,9 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
// overlaps with L5 file but not memtable or L0 file, so flush is skipped and
// file is ingested into L4
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{5, 15}}, file_id++,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {}, {}, {{5, 15}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
ASSERT_EQ(2, NumTableFilesAtLevel(0));
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
@ -727,7 +755,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
// count increases by two.
ASSERT_OK(GenerateAndAddExternalFile(
options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue},
file_id++, write_global_seqno, &true_data));
file_id++, write_global_seqno, verify_checksums_before_ingest,
&true_data));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
ASSERT_EQ(4, NumTableFilesAtLevel(0));
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
@ -740,15 +769,101 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
// seqnum.
ASSERT_OK(GenerateAndAddExternalFile(
options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue},
{{160, 200}}, file_id++, write_global_seqno, &true_data));
{{160, 200}}, file_id++, write_global_seqno,
verify_checksums_before_ingest, &true_data));
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
ASSERT_EQ(4, NumTableFilesAtLevel(0));
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
bool change_checksum_called = false;
const auto& change_checksum = [&](void* arg) {
if (!change_checksum_called) {
char* buf = reinterpret_cast<char*>(arg);
assert(nullptr != buf);
buf[0] ^= 0x1;
change_checksum_called = true;
}
};
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
change_checksum);
SyncPoint::GetInstance()->EnableProcessing();
int file_id = 0;
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
do {
Options options = CurrentOptions();
DestroyAndReopen(options);
std::map<std::string, std::string> true_data;
Status s = GenerateAndAddExternalFile(
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
write_global_seqno, verify_checksums_before_ingest, &true_data);
if (verify_checksums_before_ingest) {
ASSERT_NOK(s);
} else {
ASSERT_OK(s);
}
change_checksum_called = false;
} while (ChangeOptionsForFileIngestionTest());
}
TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) {
SyncPoint::GetInstance()->DisableProcessing();
int file_id = 0;
EnvOptions env_options;
do {
Options options = CurrentOptions();
std::string file_path = sst_files_dir_ + ToString(file_id++);
SstFileWriter sst_file_writer(env_options, options);
Status s = sst_file_writer.Open(file_path);
ASSERT_OK(s);
for (int i = 0; i != 100; ++i) {
std::string key = Key(i);
std::string value = Key(i) + ToString(0);
ASSERT_OK(sst_file_writer.Put(key, value));
}
ASSERT_OK(sst_file_writer.Finish());
{
// Get file size
uint64_t file_size = 0;
ASSERT_OK(env_->GetFileSize(file_path, &file_size));
ASSERT_GT(file_size, 8);
std::unique_ptr<RandomRWFile> rwfile;
ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions()));
// Manually corrupt the file
// We deterministically corrupt the first byte because we currently
// cannot choose a random offset. The reason for this limitation is that
// we do not checksum property block at present.
const uint64_t offset = 0;
char scratch[8] = {0};
Slice buf;
ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch));
scratch[0] ^= 0xff; // flip one bit
ASSERT_OK(rwfile->Write(offset, buf));
}
// Ingest file.
IngestExternalFileOptions ifo;
ifo.write_global_seqno = std::get<0>(GetParam());
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
s = db_->IngestExternalFile({file_path}, ifo);
if (ifo.verify_checksums_before_ingest) {
ASSERT_NOK(s);
} else {
ASSERT_OK(s);
}
} while (ChangeOptionsForFileIngestionTest());
}
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
testing::Bool());
testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),
std::make_tuple(false, true),
std::make_tuple(false, false)));
#endif // ROCKSDB_LITE

@ -316,6 +316,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
return status;
}
if (ingestion_options_.verify_checksums_before_ingest) {
status = table_reader->VerifyChecksum();
}
if (!status.ok()) {
return status;
}
// Get the external file properties
auto props = table_reader->GetTableProperties();
const auto& uprops = props->user_collected_properties;

@ -15,8 +15,9 @@
namespace rocksdb {
class ExternalSSTFileTest : public DBTestBase,
public ::testing::WithParamInterface<bool> {
class ExternalSSTFileTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public:
ExternalSSTFileTest() : DBTestBase("/external_sst_file_test") {
sst_files_dir_ = dbname_ + "/sst_files/";
@ -32,7 +33,7 @@ class ExternalSSTFileTest : public DBTestBase,
const Options options,
std::vector<std::pair<std::string, std::string>> data, int file_id = -1,
bool allow_global_seqno = false, bool write_global_seqno = false,
bool sort_data = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
// Generate a file id if not provided
@ -76,6 +77,7 @@ class ExternalSSTFileTest : public DBTestBase,
IngestExternalFileOptions ifo;
ifo.allow_global_seqno = allow_global_seqno;
ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false;
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
if (cfh) {
s = db_->IngestExternalFile(cfh, {file_path}, ifo);
} else {
@ -155,31 +157,32 @@ class ExternalSSTFileTest : public DBTestBase,
Status GenerateAndAddExternalFile(
const Options options, std::vector<std::pair<int, std::string>> data,
int file_id = -1, bool allow_global_seqno = false,
bool write_global_seqno = false, bool sort_data = false,
bool write_global_seqno = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
std::vector<std::pair<std::string, std::string>> file_data;
for (auto& entry : data) {
file_data.emplace_back(Key(entry.first), entry.second);
}
return GenerateAndAddExternalFile(options, file_data, file_id,
allow_global_seqno, write_global_seqno,
sort_data, true_data, cfh);
return GenerateAndAddExternalFile(
options, file_data, file_id, allow_global_seqno, write_global_seqno,
verify_checksums_before_ingest, sort_data, true_data, cfh);
}
Status GenerateAndAddExternalFile(
const Options options, std::vector<int> keys, int file_id = -1,
bool allow_global_seqno = false, bool write_global_seqno = false,
bool sort_data = false,
bool verify_checksums_before_ingest = true, bool sort_data = false,
std::map<std::string, std::string>* true_data = nullptr,
ColumnFamilyHandle* cfh = nullptr) {
std::vector<std::pair<std::string, std::string>> file_data;
for (auto& k : keys) {
file_data.emplace_back(Key(k), Key(k) + ToString(file_id));
}
return GenerateAndAddExternalFile(options, file_data, file_id,
allow_global_seqno, write_global_seqno,
sort_data, true_data, cfh);
return GenerateAndAddExternalFile(
options, file_data, file_id, allow_global_seqno, write_global_seqno,
verify_checksums_before_ingest, sort_data, true_data, cfh);
}
Status DeprecatedAddFile(const std::vector<std::string>& files,
@ -1157,13 +1160,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
std::map<std::string, std::string> true_data;
// File 0 will go to last level (L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false,
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "0,0,0,1");
// File 1 will go to level L2 (since it overlap with file 0 in L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "0,0,1,1");
rocksdb::SyncPoint::GetInstance()->LoadDependency({
@ -1192,13 +1195,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
// This file overlaps with file 0 (L3), file 1 (L2) and the
// output of compaction going to L1
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, true,
false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,1,1");
// This file does not overlap with any file or with the running compaction
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
false, &true_data));
true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,1,2");
// Hold compaction from finishing
@ -1318,7 +1321,7 @@ TEST_F(ExternalSSTFileTest, IngestNonExistingFile) {
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
// After full compaction, there should be only 1 file.
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
@ -1429,12 +1432,12 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
// This file overlaps with the output of the compaction (going to L3)
// so the file will be added to L0 since L3 is the base level
ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5");
// This file does not overlap with the current running compactiong
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
false, &true_data));
true, false, &true_data));
EXPECT_EQ(FilesPerLevel(), "5,0,0,1");
// Hold compaction from finishing
@ -1449,25 +1452,25 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
Reopen(options);
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false,
false, &true_data));
true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,3");
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,4");
ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "1,0,0,5");
// File 5 overlaps with file 2 (L3 / base level)
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false,
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true,
false, &true_data));
ASSERT_EQ(FilesPerLevel(), "2,0,0,5");
// File 6 overlaps with file 2 (L3 / base level) and file 5 (L0)
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, false,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true,
false, &true_data));
ASSERT_EQ(FilesPerLevel(), "3,0,0,5");
// Verify data in files
@ -1486,7 +1489,7 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
// File 7 overlaps with file 4 (L3)
ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false,
false, false, &true_data));
false, true, false, &true_data));
ASSERT_EQ(FilesPerLevel(), "5,0,0,5");
VerifyDBFromMap(true_data, &kcnt, false);
@ -1626,7 +1629,8 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
options.level0_slowdown_writes_trigger = 256;
options.level0_stop_writes_trigger = 256;
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
for (int iter = 0; iter < 2; iter++) {
bool write_to_memtable = (iter == 0);
DestroyAndReopen(options);
@ -1650,9 +1654,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
true_data[entry.first] = entry.second;
}
} else {
ASSERT_OK(GenerateAndAddExternalFile(options, random_data, -1, true,
write_global_seqno, true,
&true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, random_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, &true_data));
}
}
size_t kcnt = 0;
@ -1681,9 +1685,11 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 0; i <= 20; i++) {
file_data.emplace_back(Key(i), "L4");
}
bool write_global_seqno = GetParam();
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file dont overlap with anything in the DB, will go to L4
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());
@ -1693,8 +1699,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 80; i <= 130; i++) {
file_data.emplace_back(Key(i), "L0");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file overlap with the memtable, so it will flush it and add
// it self to L0
@ -1705,8 +1712,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 30; i <= 50; i++) {
file_data.emplace_back(Key(i), "L4");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file dont overlap with anything in the DB and fit in L4 as well
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());
@ -1716,8 +1724,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
for (int i = 10; i <= 40; i++) {
file_data.emplace_back(Key(i), "L3");
}
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
// This file overlap with files in L4, we will ingest it in L3
ASSERT_EQ("2,0,0,1,2", FilesPerLevel());
@ -1740,17 +1749,20 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// No need for flush
ASSERT_OK(GenerateAndAddExternalFile(options, {90, 100, 110}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {90, 100, 110}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
// This file will flush the memtable
ASSERT_OK(GenerateAndAddExternalFile(options, {19, 20, 21}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {19, 20, 21}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_EQ(entries_in_memtable, 0);
@ -1764,15 +1776,17 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
ASSERT_GE(entries_in_memtable, 1);
// No need for flush, this file keys fit between the memtable keys
ASSERT_OK(GenerateAndAddExternalFile(options, {202, 203, 204}, -1, true,
write_global_seqno, false, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {202, 203, 204}, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_GE(entries_in_memtable, 1);
// This file will flush the memtable
ASSERT_OK(GenerateAndAddExternalFile(options, {206, 207}, -1, true, false,
write_global_seqno, &true_data));
ASSERT_OK(GenerateAndAddExternalFile(
options, {206, 207}, -1, true, false, write_global_seqno,
verify_checksums_before_ingest, &true_data));
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
&entries_in_memtable);
ASSERT_EQ(entries_in_memtable, 0);
@ -1790,13 +1804,16 @@ TEST_P(ExternalSSTFileTest, L0SortingIssue) {
ASSERT_OK(Put(Key(1), "memtable"));
ASSERT_OK(Put(Key(10), "memtable"));
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// No Flush needed, No global seqno needed, Ingest in L1
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
write_global_seqno, false));
write_global_seqno,
verify_checksums_before_ingest, false));
// No Flush needed, but need a global seqno, Ingest in L0
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
write_global_seqno, false));
write_global_seqno,
verify_checksums_before_ingest, false));
printf("%s\n", FilesPerLevel().c_str());
// Overwrite what we added using external files
@ -2032,11 +2049,12 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
options.listeners.emplace_back(listener);
CreateAndReopenWithCF({"koko", "toto"}, options);
bool write_global_seqno = GetParam();
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
// Ingest into default cf
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[0]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[0]));
ASSERT_EQ(listener->ingested_files.size(), 1);
ASSERT_EQ(listener->ingested_files.back().cf_name, "default");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
@ -2046,9 +2064,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
"default");
// Ingest into cf1
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[1]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[1]));
ASSERT_EQ(listener->ingested_files.size(), 2);
ASSERT_EQ(listener->ingested_files.back().cf_name, "koko");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
@ -2058,9 +2076,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
"koko");
// Ingest into cf2
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
write_global_seqno, true, nullptr,
handles_[2]));
ASSERT_OK(GenerateAndAddExternalFile(
options, {1, 2}, -1, true, write_global_seqno,
verify_checksums_before_ingest, true, nullptr, handles_[2]));
ASSERT_EQ(listener->ingested_files.size(), 3);
ASSERT_EQ(listener->ingested_files.back().cf_name, "toto");
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
@ -2126,7 +2144,8 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
IngestExternalFileOptions ifo;
ifo.allow_global_seqno = true;
ifo.ingest_behind = true;
ifo.write_global_seqno = GetParam();
ifo.write_global_seqno = std::get<0>(GetParam());
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
// Can't ingest behind since allow_ingest_behind isn't set to true
ASSERT_NOK(GenerateAndAddExternalFileIngestBehind(options, ifo,
@ -2215,7 +2234,10 @@ TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
}
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
testing::Bool());
testing::Values(std::make_tuple(false, false),
std::make_tuple(false, true),
std::make_tuple(true, false),
std::make_tuple(true, true)));
} // namespace rocksdb

@ -295,7 +295,7 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
std::vector<DbPath> cf_paths;
// Compaction concurrent thread limiter for the column family.
// If non-nullptr, use given concurrent thread limiter to control
// If non-nullptr, use given concurrent thread limiter to control
// the max outstanding compaction tasks. Limiter can be shared with
// multiple column families across db instances.
//
@ -1344,6 +1344,11 @@ struct IngestExternalFileOptions {
// 2. Without writing external SST file, it's possible to do checksum.
// We have a plan to set this option to false by default in the future.
bool write_global_seqno = true;
// Set to true if you would like to verify the checksums of each block of the
// external SST file before ingestion.
// Warning: setting this to true causes slowdown in file ingestion because
// the external SST file has to be read.
bool verify_checksums_before_ingest = false;
};
// TraceOptions is used for StartTrace

@ -634,6 +634,9 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
}
assert(r->status.ok());
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
static_cast<char*>(trailer));
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
if (r->status.ok()) {
r->status = InsertBlockInCache(block_contents, type, handle);

Loading…
Cancel
Save