BlobDB GC: add SST <-> oldest blob file referenced mapping (#5903)

Summary:
This is groundwork for adding garbage collection support to BlobDB. The
patch adds logic that keeps track of the oldest blob file referred to by
each SST file. The oldest blob file is identified during flush/
compaction (similarly to how the range of keys covered by the SST is
identified), and persisted in the manifest as a custom field of the new
file edit record. Blob indexes with TTL are ignored for the purposes of
identifying the oldest blob file (since such blob files are cleaned up by the
TTL logic in BlobDB).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5903

Test Plan:
Added new unit tests; also ran db_bench in BlobDB mode, inspected the
manifest using ldb, and confirmed (by scanning the SST files using
sst_dump) that the value of the oldest blob file number field matches
the contents of the file for each SST.

Differential Revision: D17859997

Pulled By: ltamasi

fbshipit-source-id: 21662c137c6259a6af70446faaf3a9912c550e90
main
Levi Tamasi 5 years ago committed by Facebook Github Bot
parent a59dc843a4
commit 5f025ea832
  1. 2
      db/blob_index.h
  2. 7
      db/builder.cc
  3. 12
      db/compaction/compaction_job.cc
  4. 111
      db/compaction/compaction_job_test.cc
  5. 12
      db/compaction/compaction_picker_test.cc
  6. 5
      db/db_impl/db_impl_compaction_flush.cc
  7. 2
      db/db_impl/db_impl_experimental.cc
  8. 2
      db/db_impl/db_impl_open.cc
  9. 9
      db/event_helpers.cc
  10. 4
      db/event_helpers.h
  11. 3
      db/external_sst_file_ingestion_job.cc
  12. 2
      db/flush_job.cc
  13. 49
      db/flush_job_test.cc
  14. 3
      db/import_column_family_job.cc
  15. 31
      db/repair.cc
  16. 56
      db/version_builder_test.cc
  17. 66
      db/version_edit.cc
  18. 91
      db/version_edit.h
  19. 29
      db/version_edit_test.cc
  20. 6
      db/version_set.cc
  21. 31
      db/version_set_test.cc

@ -10,7 +10,6 @@
#include "util/string_util.h" #include "util/string_util.h"
namespace rocksdb { namespace rocksdb {
namespace blob_db {
// BlobIndex is a pointer to the blob and metadata of the blob. The index is // BlobIndex is a pointer to the blob and metadata of the blob. The index is
// stored in base DB as ValueType::kTypeBlobIndex. // stored in base DB as ValueType::kTypeBlobIndex.
@ -156,6 +155,5 @@ class BlobIndex {
CompressionType compression_ = kNoCompression; CompressionType compression_ = kNoCompression;
}; };
} // namespace blob_db
} // namespace rocksdb } // namespace rocksdb
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -124,7 +124,7 @@ Status BuildTable(
if (!s.ok()) { if (!s.ok()) {
EventHelpers::LogAndNotifyTableFileCreationFinished( EventHelpers::LogAndNotifyTableFileCreationFinished(
event_logger, ioptions.listeners, dbname, column_family_name, fname, event_logger, ioptions.listeners, dbname, column_family_name, fname,
job_id, meta->fd, tp, reason, s); job_id, meta->fd, kInvalidBlobFileNumber, tp, reason, s);
return s; return s;
} }
file->SetIOPriority(io_priority); file->SetIOPriority(io_priority);
@ -157,8 +157,9 @@ Status BuildTable(
for (; c_iter.Valid(); c_iter.Next()) { for (; c_iter.Valid(); c_iter.Next()) {
const Slice& key = c_iter.key(); const Slice& key = c_iter.key();
const Slice& value = c_iter.value(); const Slice& value = c_iter.value();
const ParsedInternalKey& ikey = c_iter.ikey();
builder->Add(key, value); builder->Add(key, value);
meta->UpdateBoundaries(key, c_iter.ikey().sequence); meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
// TODO(noetzli): Update stats after flush, too. // TODO(noetzli): Update stats after flush, too.
if (io_priority == Env::IO_HIGH && if (io_priority == Env::IO_HIGH &&
@ -249,7 +250,7 @@ Status BuildTable(
// Output to event logger and fire events. // Output to event logger and fire events.
EventHelpers::LogAndNotifyTableFileCreationFinished( EventHelpers::LogAndNotifyTableFileCreationFinished(
event_logger, ioptions.listeners, dbname, column_family_name, fname, event_logger, ioptions.listeners, dbname, column_family_name, fname,
job_id, meta->fd, tp, reason, s); job_id, meta->fd, meta->oldest_blob_file_number, tp, reason, s);
return s; return s;
} }

@ -933,8 +933,9 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
assert(sub_compact->current_output() != nullptr); assert(sub_compact->current_output() != nullptr);
sub_compact->builder->Add(key, value); sub_compact->builder->Add(key, value);
sub_compact->current_output_file_size = sub_compact->builder->FileSize(); sub_compact->current_output_file_size = sub_compact->builder->FileSize();
const ParsedInternalKey& ikey = c_iter->ikey();
sub_compact->current_output()->meta.UpdateBoundaries( sub_compact->current_output()->meta.UpdateBoundaries(
key, c_iter->ikey().sequence); key, value, ikey.sequence, ikey.type);
sub_compact->num_output_records++; sub_compact->num_output_records++;
// Close output file if it is big enough. Two possibilities determine it's // Close output file if it is big enough. Two possibilities determine it's
@ -1349,17 +1350,20 @@ Status CompactionJob::FinishCompactionOutputFile(
} }
std::string fname; std::string fname;
FileDescriptor output_fd; FileDescriptor output_fd;
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
if (meta != nullptr) { if (meta != nullptr) {
fname = fname =
TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths,
meta->fd.GetNumber(), meta->fd.GetPathId()); meta->fd.GetNumber(), meta->fd.GetPathId());
output_fd = meta->fd; output_fd = meta->fd;
oldest_blob_file_number = meta->oldest_blob_file_number;
} else { } else {
fname = "(nil)"; fname = "(nil)";
} }
EventHelpers::LogAndNotifyTableFileCreationFinished( EventHelpers::LogAndNotifyTableFileCreationFinished(
event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname,
job_id_, output_fd, tp, TableFileCreationReason::kCompaction, s); job_id_, output_fd, oldest_blob_file_number, tp,
TableFileCreationReason::kCompaction, s);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// Report new file to SstFileManagerImpl // Report new file to SstFileManagerImpl
@ -1469,8 +1473,8 @@ Status CompactionJob::OpenCompactionOutputFile(
LogFlush(db_options_.info_log); LogFlush(db_options_.info_log);
EventHelpers::LogAndNotifyTableFileCreationFinished( EventHelpers::LogAndNotifyTableFileCreationFinished(
event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(),
fname, job_id_, FileDescriptor(), TableProperties(), fname, job_id_, FileDescriptor(), kInvalidBlobFileNumber,
TableFileCreationReason::kCompaction, s); TableProperties(), TableFileCreationReason::kCompaction, s);
return s; return s;
} }

@ -12,6 +12,7 @@
#include <string> #include <string>
#include <tuple> #include <tuple>
#include "db/blob_index.h"
#include "db/column_family.h" #include "db/column_family.h"
#include "db/compaction/compaction_job.h" #include "db/compaction/compaction_job.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
@ -97,11 +98,34 @@ class CompactionJobTest : public testing::Test {
return TableFileName(db_paths, meta.fd.GetNumber(), meta.fd.GetPathId()); return TableFileName(db_paths, meta.fd.GetNumber(), meta.fd.GetPathId());
} }
std::string KeyStr(const std::string& user_key, const SequenceNumber seq_num, static std::string KeyStr(const std::string& user_key,
const ValueType t) { const SequenceNumber seq_num, const ValueType t) {
return InternalKey(user_key, seq_num, t).Encode().ToString(); return InternalKey(user_key, seq_num, t).Encode().ToString();
} }
static std::string BlobStr(uint64_t blob_file_number, uint64_t offset,
uint64_t size) {
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
kNoCompression);
return blob_index;
}
static std::string BlobStrTTL(uint64_t blob_file_number, uint64_t offset,
uint64_t size, uint64_t expiration) {
std::string blob_index;
BlobIndex::EncodeBlobTTL(&blob_index, expiration, blob_file_number, offset,
size, kNoCompression);
return blob_index;
}
static std::string BlobStrInlinedTTL(const Slice& value,
uint64_t expiration) {
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, expiration, value);
return blob_index;
}
void AddMockFile(const stl_wrappers::KVMap& contents, int level = 0) { void AddMockFile(const stl_wrappers::KVMap& contents, int level = 0) {
assert(contents.size() > 0); assert(contents.size() > 0);
@ -110,6 +134,7 @@ class CompactionJobTest : public testing::Test {
InternalKey smallest_key, largest_key; InternalKey smallest_key, largest_key;
SequenceNumber smallest_seqno = kMaxSequenceNumber; SequenceNumber smallest_seqno = kMaxSequenceNumber;
SequenceNumber largest_seqno = 0; SequenceNumber largest_seqno = 0;
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
for (auto kv : contents) { for (auto kv : contents) {
ParsedInternalKey key; ParsedInternalKey key;
std::string skey; std::string skey;
@ -132,6 +157,24 @@ class CompactionJobTest : public testing::Test {
} }
first_key = false; first_key = false;
if (key.type == kTypeBlobIndex) {
BlobIndex blob_index;
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
continue;
}
if (blob_index.IsInlined() || blob_index.HasTTL() ||
blob_index.file_number() == kInvalidBlobFileNumber) {
continue;
}
if (oldest_blob_file_number == kInvalidBlobFileNumber ||
oldest_blob_file_number > blob_index.file_number()) {
oldest_blob_file_number = blob_index.file_number();
}
}
} }
uint64_t file_number = versions_->NewFileNumber(); uint64_t file_number = versions_->NewFileNumber();
@ -140,7 +183,7 @@ class CompactionJobTest : public testing::Test {
VersionEdit edit; VersionEdit edit;
edit.AddFile(level, file_number, 0, 10, smallest_key, largest_key, edit.AddFile(level, file_number, 0, 10, smallest_key, largest_key,
smallest_seqno, largest_seqno, false); smallest_seqno, largest_seqno, false, oldest_blob_file_number);
mutex_.Lock(); mutex_.Lock();
versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
@ -250,7 +293,8 @@ class CompactionJobTest : public testing::Test {
const stl_wrappers::KVMap& expected_results, const stl_wrappers::KVMap& expected_results,
const std::vector<SequenceNumber>& snapshots = {}, const std::vector<SequenceNumber>& snapshots = {},
SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber,
int output_level = 1, bool verify = true) { int output_level = 1, bool verify = true,
uint64_t expected_oldest_blob_file_number = kInvalidBlobFileNumber) {
auto cfd = versions_->GetColumnFamilySet()->GetDefault(); auto cfd = versions_->GetColumnFamilySet()->GetDefault();
size_t num_input_files = 0; size_t num_input_files = 0;
@ -296,15 +340,20 @@ class CompactionJobTest : public testing::Test {
mutex_.Unlock(); mutex_.Unlock();
if (verify) { if (verify) {
if (expected_results.size() == 0) {
ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U); ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U);
ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files); ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files);
if (expected_results.empty()) {
ASSERT_EQ(compaction_job_stats_.num_output_files, 0U); ASSERT_EQ(compaction_job_stats_.num_output_files, 0U);
} else { } else {
ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U);
ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files);
ASSERT_EQ(compaction_job_stats_.num_output_files, 1U); ASSERT_EQ(compaction_job_stats_.num_output_files, 1U);
mock_table_factory_->AssertLatestFile(expected_results); mock_table_factory_->AssertLatestFile(expected_results);
auto output_files =
cfd->current()->storage_info()->LevelFiles(output_level);
ASSERT_EQ(output_files.size(), 1);
ASSERT_EQ(output_files[0]->oldest_blob_file_number,
expected_oldest_blob_file_number);
} }
} }
} }
@ -960,6 +1009,54 @@ TEST_F(CompactionJobTest, CorruptionAfterDeletion) {
RunCompaction({files}, expected_results); RunCompaction({files}, expected_results);
} }
TEST_F(CompactionJobTest, OldestBlobFileNumber) {
NewDB();
// Note: blob1 is inlined TTL, so it will not be considered for the purposes
// of identifying the oldest referenced blob file. Similarly, blob6 will be
// ignored because it has TTL and hence refers to a TTL blob file.
const stl_wrappers::KVMap::value_type blob1(
KeyStr("a", 1U, kTypeBlobIndex), BlobStrInlinedTTL("foo", 1234567890ULL));
const stl_wrappers::KVMap::value_type blob2(KeyStr("b", 2U, kTypeBlobIndex),
BlobStr(59, 123456, 999));
const stl_wrappers::KVMap::value_type blob3(KeyStr("c", 3U, kTypeBlobIndex),
BlobStr(138, 1000, 1 << 8));
auto file1 = mock::MakeMockFile({blob1, blob2, blob3});
AddMockFile(file1);
const stl_wrappers::KVMap::value_type blob4(KeyStr("d", 4U, kTypeBlobIndex),
BlobStr(199, 3 << 10, 1 << 20));
const stl_wrappers::KVMap::value_type blob5(KeyStr("e", 5U, kTypeBlobIndex),
BlobStr(19, 6789, 333));
const stl_wrappers::KVMap::value_type blob6(
KeyStr("f", 6U, kTypeBlobIndex),
BlobStrTTL(5, 2048, 1 << 7, 1234567890ULL));
auto file2 = mock::MakeMockFile({blob4, blob5, blob6});
AddMockFile(file2);
const stl_wrappers::KVMap::value_type expected_blob1(
KeyStr("a", 0U, kTypeBlobIndex), blob1.second);
const stl_wrappers::KVMap::value_type expected_blob2(
KeyStr("b", 0U, kTypeBlobIndex), blob2.second);
const stl_wrappers::KVMap::value_type expected_blob3(
KeyStr("c", 0U, kTypeBlobIndex), blob3.second);
const stl_wrappers::KVMap::value_type expected_blob4(
KeyStr("d", 0U, kTypeBlobIndex), blob4.second);
const stl_wrappers::KVMap::value_type expected_blob5(
KeyStr("e", 0U, kTypeBlobIndex), blob5.second);
const stl_wrappers::KVMap::value_type expected_blob6(
KeyStr("f", 0U, kTypeBlobIndex), blob6.second);
auto expected_results =
mock::MakeMockFile({expected_blob1, expected_blob2, expected_blob3,
expected_blob4, expected_blob5, expected_blob6});
SetLastSequence(6U);
auto files = cfd_->current()->storage_info()->LevelFiles(0);
RunCompaction({files}, expected_results, std::vector<SequenceNumber>(),
kMaxSequenceNumber, /* output_level */ 1, /* verify */ true,
/* expected_oldest_blob_file_number */ 19);
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -88,15 +88,13 @@ class CompactionPickerTest : public testing::Test {
SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100,
size_t compensated_file_size = 0) { size_t compensated_file_size = 0) {
assert(level < vstorage_->num_levels()); assert(level < vstorage_->num_levels());
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData(
f->fd = FileDescriptor(file_number, path_id, file_size); file_number, path_id, file_size,
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue); InternalKey(smallest, smallest_seq, kTypeValue),
f->largest = InternalKey(largest, largest_seq, kTypeValue); InternalKey(largest, largest_seq, kTypeValue), smallest_seq,
f->fd.smallest_seqno = smallest_seq; largest_seq, /* marked_for_compact */ false, kInvalidBlobFileNumber);
f->fd.largest_seqno = largest_seq;
f->compensated_file_size = f->compensated_file_size =
(compensated_file_size != 0) ? compensated_file_size : file_size; (compensated_file_size != 0) ? compensated_file_size : file_size;
f->refs = 0;
vstorage_->AddFile(level, f); vstorage_->AddFile(level, f);
files_.emplace_back(f); files_.emplace_back(f);
file_map_.insert({file_number, {f, level}}); file_map_.insert({file_number, {f, level}});

@ -1257,7 +1257,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(), edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(),
f->fd.GetFileSize(), f->smallest, f->largest, f->fd.GetFileSize(), f->smallest, f->largest,
f->fd.smallest_seqno, f->fd.largest_seqno, f->fd.smallest_seqno, f->fd.largest_seqno,
f->marked_for_compaction); f->marked_for_compaction, f->oldest_blob_file_number);
} }
ROCKS_LOG_DEBUG(immutable_db_options_.info_log, ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(), "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@ -2657,7 +2657,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
c->edit()->AddFile(c->output_level(), f->fd.GetNumber(), c->edit()->AddFile(c->output_level(), f->fd.GetNumber(),
f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest,
f->largest, f->fd.smallest_seqno, f->largest, f->fd.smallest_seqno,
f->fd.largest_seqno, f->marked_for_compaction); f->fd.largest_seqno, f->marked_for_compaction,
f->oldest_blob_file_number);
ROCKS_LOG_BUFFER( ROCKS_LOG_BUFFER(
log_buffer, log_buffer,

@ -128,7 +128,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
edit.AddFile(target_level, f->fd.GetNumber(), f->fd.GetPathId(), edit.AddFile(target_level, f->fd.GetNumber(), f->fd.GetPathId(),
f->fd.GetFileSize(), f->smallest, f->largest, f->fd.GetFileSize(), f->smallest, f->largest,
f->fd.smallest_seqno, f->fd.largest_seqno, f->fd.smallest_seqno, f->fd.largest_seqno,
f->marked_for_compaction); f->marked_for_compaction, f->oldest_blob_file_number);
} }
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),

@ -1210,7 +1210,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(), edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
meta.fd.GetFileSize(), meta.smallest, meta.largest, meta.fd.GetFileSize(), meta.smallest, meta.largest,
meta.fd.smallest_seqno, meta.fd.largest_seqno, meta.fd.smallest_seqno, meta.fd.largest_seqno,
meta.marked_for_compaction); meta.marked_for_compaction, meta.oldest_blob_file_number);
} }
InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);

@ -70,8 +70,8 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
const std::vector<std::shared_ptr<EventListener>>& listeners, const std::vector<std::shared_ptr<EventListener>>& listeners,
const std::string& db_name, const std::string& cf_name, const std::string& db_name, const std::string& cf_name,
const std::string& file_path, int job_id, const FileDescriptor& fd, const std::string& file_path, int job_id, const FileDescriptor& fd,
const TableProperties& table_properties, TableFileCreationReason reason, uint64_t oldest_blob_file_number, const TableProperties& table_properties,
const Status& s) { TableFileCreationReason reason, const Status& s) {
if (s.ok() && event_logger) { if (s.ok() && event_logger) {
JSONWriter jwriter; JSONWriter jwriter;
AppendCurrentTime(&jwriter); AppendCurrentTime(&jwriter);
@ -129,6 +129,11 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
} }
jwriter.EndObject(); jwriter.EndObject();
} }
if (oldest_blob_file_number != kInvalidBlobFileNumber) {
jwriter << "oldest_blob_file_number" << oldest_blob_file_number;
}
jwriter.EndObject(); jwriter.EndObject();
event_logger->Log(jwriter); event_logger->Log(jwriter);

@ -34,8 +34,8 @@ class EventHelpers {
const std::vector<std::shared_ptr<EventListener>>& listeners, const std::vector<std::shared_ptr<EventListener>>& listeners,
const std::string& db_name, const std::string& cf_name, const std::string& db_name, const std::string& cf_name,
const std::string& file_path, int job_id, const FileDescriptor& fd, const std::string& file_path, int job_id, const FileDescriptor& fd,
const TableProperties& table_properties, TableFileCreationReason reason, uint64_t oldest_blob_file_number, const TableProperties& table_properties,
const Status& s); TableFileCreationReason reason, const Status& s);
static void LogAndNotifyTableFileDeletion( static void LogAndNotifyTableFileDeletion(
EventLogger* event_logger, int job_id, EventLogger* event_logger, int job_id,
uint64_t file_number, const std::string& file_path, uint64_t file_number, const std::string& file_path,

@ -243,10 +243,11 @@ Status ExternalSstFileIngestionJob::Run() {
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(), edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key, f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key, f.assigned_seqno, f.assigned_seqno, f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
false); false, kInvalidBlobFileNumber);
} }
return status; return status;
} }

@ -408,7 +408,7 @@ Status FlushJob::WriteLevel0Table() {
edit_->AddFile(0 /* level */, meta_.fd.GetNumber(), meta_.fd.GetPathId(), edit_->AddFile(0 /* level */, meta_.fd.GetNumber(), meta_.fd.GetPathId(),
meta_.fd.GetFileSize(), meta_.smallest, meta_.largest, meta_.fd.GetFileSize(), meta_.smallest, meta_.largest,
meta_.fd.smallest_seqno, meta_.fd.largest_seqno, meta_.fd.smallest_seqno, meta_.fd.largest_seqno,
meta_.marked_for_compaction); meta_.marked_for_compaction, meta_.oldest_blob_file_number);
} }
// Note that here we treat flush as level 0 compaction in internal stats // Note that here we treat flush as level 0 compaction in internal stats

@ -4,9 +4,11 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include <algorithm> #include <algorithm>
#include <array>
#include <map> #include <map>
#include <string> #include <string>
#include "db/blob_index.h"
#include "db/column_family.h" #include "db/column_family.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/flush_job.h" #include "db/flush_job.h"
@ -154,6 +156,7 @@ TEST_F(FlushJobTest, NonEmpty) {
// seqno [ 1, 2 ... 8998, 8999, 9000, 9001, 9002 ... 9999 ] // seqno [ 1, 2 ... 8998, 8999, 9000, 9001, 9002 ... 9999 ]
// key [ 1001, 1002 ... 9998, 9999, 0, 1, 2 ... 999 ] // key [ 1001, 1002 ... 9998, 9999, 0, 1, 2 ... 999 ]
// range-delete "9995" -> "9999" at seqno 10000 // range-delete "9995" -> "9999" at seqno 10000
// blob references with seqnos 10001..10006
for (int i = 1; i < 10000; ++i) { for (int i = 1; i < 10000; ++i) {
std::string key(ToString((i + 1000) % 10000)); std::string key(ToString((i + 1000) % 10000));
std::string value("value" + key); std::string value("value" + key);
@ -163,9 +166,43 @@ TEST_F(FlushJobTest, NonEmpty) {
inserted_keys.insert({internal_key.Encode().ToString(), value}); inserted_keys.insert({internal_key.Encode().ToString(), value});
} }
} }
{
new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995", "9999a"); new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995", "9999a");
InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion); InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion);
inserted_keys.insert({internal_key.Encode().ToString(), "9999a"}); inserted_keys.insert({internal_key.Encode().ToString(), "9999a"});
}
#ifndef ROCKSDB_LITE
// Note: the first two blob references will not be considered when resolving
// the oldest blob file referenced (the first one is inlined TTL, while the
// second one is TTL and thus points to a TTL blob file).
constexpr std::array<uint64_t, 6> blob_file_numbers{
kInvalidBlobFileNumber, 5, 103, 17, 102, 101};
for (size_t i = 0; i < blob_file_numbers.size(); ++i) {
std::string key(ToString(i + 10001));
std::string blob_index;
if (i == 0) {
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 1234567890ULL,
"foo");
} else if (i == 1) {
BlobIndex::EncodeBlobTTL(&blob_index, /* expiration */ 1234567890ULL,
blob_file_numbers[i], /* offset */ i << 10,
/* size */ i << 20, kNoCompression);
} else {
BlobIndex::EncodeBlob(&blob_index, blob_file_numbers[i],
/* offset */ i << 10, /* size */ i << 20,
kNoCompression);
}
const SequenceNumber seq(i + 10001);
new_mem->Add(seq, kTypeBlobIndex, key, blob_index);
InternalKey internal_key(key, seq, kTypeBlobIndex);
inserted_keys.emplace_hint(inserted_keys.end(),
internal_key.Encode().ToString(), blob_index);
}
#endif
autovector<MemTable*> to_delete; autovector<MemTable*> to_delete;
cfd->imm()->Add(new_mem, &to_delete); cfd->imm()->Add(new_mem, &to_delete);
@ -194,11 +231,14 @@ TEST_F(FlushJobTest, NonEmpty) {
ASSERT_GT(hist.average, 0.0); ASSERT_GT(hist.average, 0.0);
ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString()); ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString());
ASSERT_EQ( ASSERT_EQ("9999a", file_meta.largest.user_key().ToString());
"9999a",
file_meta.largest.user_key().ToString()); // range tombstone end key
ASSERT_EQ(1, file_meta.fd.smallest_seqno); ASSERT_EQ(1, file_meta.fd.smallest_seqno);
ASSERT_EQ(10000, file_meta.fd.largest_seqno); // range tombstone seqnum 10000 #ifndef ROCKSDB_LITE
ASSERT_EQ(10006, file_meta.fd.largest_seqno);
ASSERT_EQ(17, file_meta.oldest_blob_file_number);
#else
ASSERT_EQ(10000, file_meta.fd.largest_seqno);
#endif
mock_table_factory_->AssertSingleFile(inserted_keys); mock_table_factory_->AssertSingleFile(inserted_keys);
job_context.Clean(); job_context.Clean();
} }
@ -261,6 +301,7 @@ TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) {
ASSERT_EQ(0, file_meta.fd.smallest_seqno); ASSERT_EQ(0, file_meta.fd.smallest_seqno);
ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1), ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1),
file_meta.fd.largest_seqno); file_meta.fd.largest_seqno);
ASSERT_EQ(kInvalidBlobFileNumber, file_meta.oldest_blob_file_number);
for (auto m : to_delete) { for (auto m : to_delete) {
delete m; delete m;

@ -136,10 +136,11 @@ Status ImportColumnFamilyJob::Run() {
for (size_t i = 0; i < files_to_import_.size(); ++i) { for (size_t i = 0; i < files_to_import_.size(); ++i) {
const auto& f = files_to_import_[i]; const auto& f = files_to_import_[i];
const auto& file_metadata = metadata_[i]; const auto& file_metadata = metadata_[i];
edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key, f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key, file_metadata.smallest_seqno, f.largest_internal_key, file_metadata.smallest_seqno,
file_metadata.largest_seqno, false); file_metadata.largest_seqno, false, kInvalidBlobFileNumber);
// If incoming sequence number is higher, update local sequence number. // If incoming sequence number is higher, update local sequence number.
if (file_metadata.largest_seqno > versions_->LastSequence()) { if (file_metadata.largest_seqno > versions_->LastSequence()) {

@ -34,6 +34,7 @@
// We scan every table to compute // We scan every table to compute
// (1) smallest/largest for the table // (1) smallest/largest for the table
// (2) largest sequence number in the table // (2) largest sequence number in the table
// (3) oldest blob file referred to by the table (if applicable)
// //
// If we are unable to scan the file, then we ignore the table. // If we are unable to scan the file, then we ignore the table.
// //
@ -224,8 +225,6 @@ class Repairer {
FileMetaData meta; FileMetaData meta;
uint32_t column_family_id; uint32_t column_family_id;
std::string column_family_name; std::string column_family_name;
SequenceNumber min_sequence;
SequenceNumber max_sequence;
}; };
std::string const dbname_; std::string const dbname_;
@ -526,10 +525,7 @@ class Repairer {
TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false,
/*level=*/-1, /*smallest_compaction_key=*/nullptr, /*level=*/-1, /*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr); /*largest_compaction_key=*/nullptr);
bool empty = true;
ParsedInternalKey parsed; ParsedInternalKey parsed;
t->min_sequence = 0;
t->max_sequence = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
Slice key = iter->key(); Slice key = iter->key();
if (!ParseInternalKey(key, &parsed)) { if (!ParseInternalKey(key, &parsed)) {
@ -540,18 +536,9 @@ class Repairer {
} }
counter++; counter++;
if (empty) {
empty = false; t->meta.UpdateBoundaries(key, iter->value(), parsed.sequence,
t->meta.smallest.DecodeFrom(key); parsed.type);
t->min_sequence = parsed.sequence;
}
t->meta.largest.DecodeFrom(key);
if (parsed.sequence < t->min_sequence) {
t->min_sequence = parsed.sequence;
}
if (parsed.sequence > t->max_sequence) {
t->max_sequence = parsed.sequence;
}
} }
if (!iter->status().ok()) { if (!iter->status().ok()) {
status = iter->status(); status = iter->status();
@ -570,8 +557,8 @@ class Repairer {
SequenceNumber max_sequence = 0; SequenceNumber max_sequence = 0;
for (size_t i = 0; i < tables_.size(); i++) { for (size_t i = 0; i < tables_.size(); i++) {
cf_id_to_tables[tables_[i].column_family_id].push_back(&tables_[i]); cf_id_to_tables[tables_[i].column_family_id].push_back(&tables_[i]);
if (max_sequence < tables_[i].max_sequence) { if (max_sequence < tables_[i].meta.fd.largest_seqno) {
max_sequence = tables_[i].max_sequence; max_sequence = tables_[i].meta.fd.largest_seqno;
} }
} }
vset_.SetLastAllocatedSequence(max_sequence); vset_.SetLastAllocatedSequence(max_sequence);
@ -591,8 +578,10 @@ class Repairer {
for (const auto* table : cf_id_and_tables.second) { for (const auto* table : cf_id_and_tables.second) {
edit.AddFile(0, table->meta.fd.GetNumber(), table->meta.fd.GetPathId(), edit.AddFile(0, table->meta.fd.GetNumber(), table->meta.fd.GetPathId(),
table->meta.fd.GetFileSize(), table->meta.smallest, table->meta.fd.GetFileSize(), table->meta.smallest,
table->meta.largest, table->min_sequence, table->meta.largest, table->meta.fd.smallest_seqno,
table->max_sequence, table->meta.marked_for_compaction); table->meta.fd.largest_seqno,
table->meta.marked_for_compaction,
table->meta.oldest_blob_file_number);
} }
assert(next_file_number_ > 0); assert(next_file_number_ > 0);
vset_.MarkFileNumberUsed(next_file_number_ - 1); vset_.MarkFileNumberUsed(next_file_number_ - 1);

@ -59,14 +59,11 @@ class VersionBuilderTest : public testing::Test {
bool sampled = false, SequenceNumber smallest_seqno = 0, bool sampled = false, SequenceNumber smallest_seqno = 0,
SequenceNumber largest_seqno = 0) { SequenceNumber largest_seqno = 0) {
assert(level < vstorage_.num_levels()); assert(level < vstorage_.num_levels());
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData(
f->fd = FileDescriptor(file_number, path_id, file_size); file_number, path_id, file_size, GetInternalKey(smallest, smallest_seq),
f->smallest = GetInternalKey(smallest, smallest_seq); GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno,
f->largest = GetInternalKey(largest, largest_seq); /* marked_for_compact */ false, kInvalidBlobFileNumber);
f->fd.smallest_seqno = smallest_seqno;
f->fd.largest_seqno = largest_seqno;
f->compensated_file_size = file_size; f->compensated_file_size = file_size;
f->refs = 0;
f->num_entries = num_entries; f->num_entries = num_entries;
f->num_deletions = num_deletions; f->num_deletions = num_deletions;
vstorage_.AddFile(level, f); vstorage_.AddFile(level, f);
@ -115,7 +112,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) {
VersionEdit version_edit; VersionEdit version_edit;
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
GetInternalKey("350"), 200, 200, false); GetInternalKey("350"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.DeleteFile(3, 27U); version_edit.DeleteFile(3, 27U);
EnvOptions env_options; EnvOptions env_options;
@ -149,7 +147,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) {
VersionEdit version_edit; VersionEdit version_edit;
version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"), version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"),
GetInternalKey("350"), 200, 200, false); GetInternalKey("350"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 1U);
version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(0, 88U);
@ -186,7 +185,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) {
VersionEdit version_edit; VersionEdit version_edit;
version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"), version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"),
GetInternalKey("350"), 200, 200, false); GetInternalKey("350"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 1U);
version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(0, 88U);
version_edit.DeleteFile(4, 6U); version_edit.DeleteFile(4, 6U);
@ -214,15 +214,20 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) {
VersionEdit version_edit; VersionEdit version_edit;
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
GetInternalKey("350"), 200, 200, false); GetInternalKey("350"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
GetInternalKey("450"), 200, 200, false); GetInternalKey("450"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
GetInternalKey("650"), 200, 200, false); GetInternalKey("650"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
GetInternalKey("550"), 200, 200, false); GetInternalKey("550"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
GetInternalKey("750"), 200, 200, false); GetInternalKey("750"), 200, 200, false,
kInvalidBlobFileNumber);
EnvOptions env_options; EnvOptions env_options;
@ -248,24 +253,31 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
VersionEdit version_edit; VersionEdit version_edit;
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
GetInternalKey("350"), 200, 200, false); GetInternalKey("350"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
GetInternalKey("450"), 200, 200, false); GetInternalKey("450"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
GetInternalKey("650"), 200, 200, false); GetInternalKey("650"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
GetInternalKey("550"), 200, 200, false); GetInternalKey("550"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
GetInternalKey("750"), 200, 200, false); GetInternalKey("750"), 200, 200, false,
kInvalidBlobFileNumber);
version_builder.Apply(&version_edit); version_builder.Apply(&version_edit);
VersionEdit version_edit2; VersionEdit version_edit2;
version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"), version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"),
GetInternalKey("950"), 200, 200, false); GetInternalKey("950"), 200, 200, false,
kInvalidBlobFileNumber);
version_edit2.DeleteFile(2, 616); version_edit2.DeleteFile(2, 616);
version_edit2.DeleteFile(2, 636); version_edit2.DeleteFile(2, 636);
version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"), version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"),
GetInternalKey("850"), 200, 200, false); GetInternalKey("850"), 200, 200, false,
kInvalidBlobFileNumber);
version_builder.Apply(&version_edit2); version_builder.Apply(&version_edit2);
version_builder.SaveTo(&new_vstorage); version_builder.SaveTo(&new_vstorage);

@ -9,6 +9,7 @@
#include "db/version_edit.h" #include "db/version_edit.h"
#include "db/blob_index.h"
#include "db/version_set.h" #include "db/version_set.h"
#include "logging/event_logger.h" #include "logging/event_logger.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
@ -59,6 +60,7 @@ enum CustomTag : uint32_t {
// kMinLogNumberToKeep as part of a CustomTag as a hack. This should be // kMinLogNumberToKeep as part of a CustomTag as a hack. This should be
// removed when manifest becomes forward-comptabile. // removed when manifest becomes forward-comptabile.
kMinLogNumberToKeepHack = 3, kMinLogNumberToKeepHack = 3,
kOldestBlobFileNumber = 4,
kPathId = 65, kPathId = 65,
}; };
// If this bit for the custom tag is set, opening DB should fail if // If this bit for the custom tag is set, opening DB should fail if
@ -70,6 +72,49 @@ uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) {
return number | (path_id * (kFileNumberMask + 1)); return number | (path_id * (kFileNumberMask + 1));
} }
void FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value,
SequenceNumber seqno,
ValueType value_type) {
if (smallest.size() == 0) {
smallest.DecodeFrom(key);
}
largest.DecodeFrom(key);
fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
fd.largest_seqno = std::max(fd.largest_seqno, seqno);
#ifndef ROCKSDB_LITE
if (value_type == kTypeBlobIndex) {
BlobIndex blob_index;
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
return;
}
if (blob_index.IsInlined()) {
return;
}
if (blob_index.HasTTL()) {
return;
}
// Paranoid check: this should not happen because BlobDB numbers the blob
// files starting from 1.
if (blob_index.file_number() == kInvalidBlobFileNumber) {
return;
}
if (oldest_blob_file_number == kInvalidBlobFileNumber ||
oldest_blob_file_number > blob_index.file_number()) {
oldest_blob_file_number = blob_index.file_number();
}
}
#else
(void)value;
(void)value_type;
#endif
}
void VersionEdit::Clear() { void VersionEdit::Clear() {
db_id_.clear(); db_id_.clear();
comparator_.clear(); comparator_.clear();
@ -134,7 +179,8 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
return false; return false;
} }
bool has_customized_fields = false; bool has_customized_fields = false;
if (f.marked_for_compaction || has_min_log_number_to_keep_) { if (f.marked_for_compaction || has_min_log_number_to_keep_ ||
f.oldest_blob_file_number != kInvalidBlobFileNumber) {
PutVarint32(dst, kNewFile4); PutVarint32(dst, kNewFile4);
has_customized_fields = true; has_customized_fields = true;
} else if (f.fd.GetPathId() == 0) { } else if (f.fd.GetPathId() == 0) {
@ -197,6 +243,12 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
PutLengthPrefixedSlice(dst, Slice(varint_log_number)); PutLengthPrefixedSlice(dst, Slice(varint_log_number));
min_log_num_written = true; min_log_num_written = true;
} }
if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
PutVarint32(dst, CustomTag::kOldestBlobFileNumber);
std::string oldest_blob_file_number;
PutVarint64(&oldest_blob_file_number, f.oldest_blob_file_number);
PutLengthPrefixedSlice(dst, Slice(oldest_blob_file_number));
}
TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields",
dst); dst);
@ -302,6 +354,11 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
} }
has_min_log_number_to_keep_ = true; has_min_log_number_to_keep_ = true;
break; break;
case kOldestBlobFileNumber:
if (!GetVarint64(&field, &f.oldest_blob_file_number)) {
return "invalid oldest blob file number";
}
break;
default: default:
if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) {
// Should not proceed if cannot understand it // Should not proceed if cannot understand it
@ -602,6 +659,10 @@ std::string VersionEdit::DebugString(bool hex_key) const {
r.append(f.smallest.DebugString(hex_key)); r.append(f.smallest.DebugString(hex_key));
r.append(" .. "); r.append(" .. ");
r.append(f.largest.DebugString(hex_key)); r.append(f.largest.DebugString(hex_key));
if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
r.append(" blob_file:");
AppendNumberTo(&r, f.oldest_blob_file_number);
}
} }
r.append("\n ColumnFamily: "); r.append("\n ColumnFamily: ");
AppendNumberTo(&r, column_family_); AppendNumberTo(&r, column_family_);
@ -676,6 +737,9 @@ std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const {
jw << "FileSize" << f.fd.GetFileSize(); jw << "FileSize" << f.fd.GetFileSize();
jw << "SmallestIKey" << f.smallest.DebugString(hex_key); jw << "SmallestIKey" << f.smallest.DebugString(hex_key);
jw << "LargestIKey" << f.largest.DebugString(hex_key); jw << "LargestIKey" << f.largest.DebugString(hex_key);
if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
jw << "OldestBlobFile" << f.oldest_blob_file_number;
}
jw.EndArrayedObject(); jw.EndArrayedObject();
} }

@ -22,7 +22,8 @@ namespace rocksdb {
class VersionSet; class VersionSet;
const uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF;
constexpr uint64_t kInvalidBlobFileNumber = 0;
extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id); extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
@ -91,7 +92,7 @@ struct FileMetaData {
InternalKey largest; // Largest internal key served by table InternalKey largest; // Largest internal key served by table
// Needs to be disposed when refs becomes 0. // Needs to be disposed when refs becomes 0.
Cache::Handle* table_reader_handle; Cache::Handle* table_reader_handle = nullptr;
FileSampledStats stats; FileSampledStats stats;
@ -100,45 +101,44 @@ struct FileMetaData {
// File size compensated by deletion entry. // File size compensated by deletion entry.
// This is updated in Version::UpdateAccumulatedStats() first time when the // This is updated in Version::UpdateAccumulatedStats() first time when the
// file is created or loaded. After it is updated (!= 0), it is immutable. // file is created or loaded. After it is updated (!= 0), it is immutable.
uint64_t compensated_file_size; uint64_t compensated_file_size = 0;
// These values can mutate, but they can only be read or written from // These values can mutate, but they can only be read or written from
// single-threaded LogAndApply thread // single-threaded LogAndApply thread
uint64_t num_entries; // the number of entries. uint64_t num_entries = 0; // the number of entries.
uint64_t num_deletions; // the number of deletion entries. uint64_t num_deletions = 0; // the number of deletion entries.
uint64_t raw_key_size; // total uncompressed key size. uint64_t raw_key_size = 0; // total uncompressed key size.
uint64_t raw_value_size; // total uncompressed value size. uint64_t raw_value_size = 0; // total uncompressed value size.
int refs; // Reference count int refs = 0; // Reference count
bool being_compacted; // Is this file undergoing compaction? bool being_compacted = false; // Is this file undergoing compaction?
bool init_stats_from_file; // true if the data-entry stats of this file bool init_stats_from_file = false; // true if the data-entry stats of this
// has initialized from file. // file has initialized from file.
bool marked_for_compaction; // True if client asked us nicely to compact this bool marked_for_compaction = false; // True if client asked us nicely to
// file. // compact this file.
FileMetaData() // Used only in BlobDB. The file number of the oldest blob file this SST file
: table_reader_handle(nullptr), // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.
compensated_file_size(0), uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
num_entries(0),
num_deletions(0), FileMetaData() = default;
raw_key_size(0),
raw_value_size(0), FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
refs(0), const InternalKey& smallest_key, const InternalKey& largest_key,
being_compacted(false), const SequenceNumber& smallest_seq,
init_stats_from_file(false), const SequenceNumber& largest_seq, bool marked_for_compact,
marked_for_compaction(false) {} uint64_t oldest_blob_file)
: fd(file, file_path_id, file_size, smallest_seq, largest_seq),
smallest(smallest_key),
largest(largest_key),
marked_for_compaction(marked_for_compact),
oldest_blob_file_number(oldest_blob_file) {}
// REQUIRED: Keys must be given to the function in sorted order (it expects // REQUIRED: Keys must be given to the function in sorted order (it expects
// the last key to be the largest). // the last key to be the largest).
void UpdateBoundaries(const Slice& key, SequenceNumber seqno) { void UpdateBoundaries(const Slice& key, const Slice& value,
if (smallest.size() == 0) { SequenceNumber seqno, ValueType value_type);
smallest.DecodeFrom(key);
}
largest.DecodeFrom(key);
fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
fd.largest_seqno = std::max(fd.largest_seqno, seqno);
}
// Unlike UpdateBoundaries, ranges do not need to be presented in any // Unlike UpdateBoundaries, ranges do not need to be presented in any
// particular order. // particular order.
@ -249,21 +249,18 @@ class VersionEdit {
// Add the specified file at the specified number. // Add the specified file at the specified number.
// REQUIRES: This version has not been saved (see VersionSet::SaveTo) // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file // REQUIRES: "smallest" and "largest" are smallest and largest keys in file
// REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file
// referred to by this file if any, kInvalidBlobFileNumber otherwise.
void AddFile(int level, uint64_t file, uint32_t file_path_id, void AddFile(int level, uint64_t file, uint32_t file_path_id,
uint64_t file_size, const InternalKey& smallest, uint64_t file_size, const InternalKey& smallest,
const InternalKey& largest, const SequenceNumber& smallest_seqno, const InternalKey& largest, const SequenceNumber& smallest_seqno,
const SequenceNumber& largest_seqno, const SequenceNumber& largest_seqno, bool marked_for_compaction,
bool marked_for_compaction) { uint64_t oldest_blob_file_number) {
assert(smallest_seqno <= largest_seqno); assert(smallest_seqno <= largest_seqno);
FileMetaData f; new_files_.emplace_back(
f.fd = FileDescriptor(file, file_path_id, file_size, smallest_seqno, level, FileMetaData(file, file_path_id, file_size, smallest, largest,
largest_seqno); smallest_seqno, largest_seqno,
f.smallest = smallest; marked_for_compaction, oldest_blob_file_number));
f.largest = largest;
f.fd.smallest_seqno = smallest_seqno;
f.fd.largest_seqno = largest_seqno;
f.marked_for_compaction = marked_for_compaction;
new_files_.emplace_back(level, std::move(f));
} }
void AddFile(int level, const FileMetaData& f) { void AddFile(int level, const FileMetaData& f) {

@ -36,7 +36,7 @@ TEST_F(VersionEditTest, EncodeDecode) {
edit.AddFile(3, kBig + 300 + i, kBig32Bit + 400 + i, 0, edit.AddFile(3, kBig + 300 + i, kBig32Bit + 400 + i, 0,
InternalKey("foo", kBig + 500 + i, kTypeValue), InternalKey("foo", kBig + 500 + i, kTypeValue),
InternalKey("zoo", kBig + 600 + i, kTypeDeletion), InternalKey("zoo", kBig + 600 + i, kTypeDeletion),
kBig + 500 + i, kBig + 600 + i, false); kBig + 500 + i, kBig + 600 + i, false, kInvalidBlobFileNumber);
edit.DeleteFile(4, kBig + 700 + i); edit.DeleteFile(4, kBig + 700 + i);
} }
@ -53,13 +53,16 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) {
VersionEdit edit; VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true); kBig + 600, true, kInvalidBlobFileNumber);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false); kBig + 601, false, kInvalidBlobFileNumber);
edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue),
InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502,
kBig + 602, true); kBig + 602, true, kInvalidBlobFileNumber);
edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex),
InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503,
kBig + 603, true, 1001);
edit.DeleteFile(4, 700); edit.DeleteFile(4, 700);
@ -78,9 +81,18 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) {
ASSERT_TRUE(new_files[0].second.marked_for_compaction); ASSERT_TRUE(new_files[0].second.marked_for_compaction);
ASSERT_TRUE(!new_files[1].second.marked_for_compaction); ASSERT_TRUE(!new_files[1].second.marked_for_compaction);
ASSERT_TRUE(new_files[2].second.marked_for_compaction); ASSERT_TRUE(new_files[2].second.marked_for_compaction);
ASSERT_TRUE(new_files[3].second.marked_for_compaction);
ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[0].second.fd.GetPathId());
ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[1].second.fd.GetPathId());
ASSERT_EQ(0u, new_files[2].second.fd.GetPathId()); ASSERT_EQ(0u, new_files[2].second.fd.GetPathId());
ASSERT_EQ(0u, new_files[3].second.fd.GetPathId());
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[0].second.oldest_blob_file_number);
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[1].second.oldest_blob_file_number);
ASSERT_EQ(kInvalidBlobFileNumber,
new_files[2].second.oldest_blob_file_number);
ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number);
} }
TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { TEST_F(VersionEditTest, ForwardCompatibleNewFile4) {
@ -88,10 +100,10 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) {
VersionEdit edit; VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true); kBig + 600, true, kInvalidBlobFileNumber);
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
kBig + 601, false); kBig + 601, false, kInvalidBlobFileNumber);
edit.DeleteFile(4, 700); edit.DeleteFile(4, 700);
edit.SetComparatorName("foo"); edit.SetComparatorName("foo");
@ -137,7 +149,7 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) {
VersionEdit edit; VersionEdit edit;
edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue),
InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
kBig + 600, true); kBig + 600, true, kInvalidBlobFileNumber);
edit.SetComparatorName("foo"); edit.SetComparatorName("foo");
edit.SetLogNumber(kBig + 100); edit.SetLogNumber(kBig + 100);
@ -164,7 +176,8 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) {
TEST_F(VersionEditTest, EncodeEmptyFile) { TEST_F(VersionEditTest, EncodeEmptyFile) {
VersionEdit edit; VersionEdit edit;
edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false); edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false,
kInvalidBlobFileNumber);
std::string buffer; std::string buffer;
ASSERT_TRUE(!edit.EncodeTo(&buffer)); ASSERT_TRUE(!edit.EncodeTo(&buffer));
} }

@ -3379,6 +3379,10 @@ std::string Version::DebugString(bool hex, bool print_stats) const {
r.append(" .. "); r.append(" .. ");
r.append(files[i]->largest.DebugString(hex)); r.append(files[i]->largest.DebugString(hex));
r.append("]"); r.append("]");
if (files[i]->oldest_blob_file_number != kInvalidBlobFileNumber) {
r.append(" blob_file:");
AppendNumberTo(&r, files[i]->oldest_blob_file_number);
}
if (print_stats) { if (print_stats) {
r.append("("); r.append("(");
r.append(ToString( r.append(ToString(
@ -4923,7 +4927,7 @@ Status VersionSet::WriteCurrentStateToManifest(log::Writer* log) {
edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(), edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(),
f->fd.GetFileSize(), f->smallest, f->largest, f->fd.GetFileSize(), f->smallest, f->largest,
f->fd.smallest_seqno, f->fd.largest_seqno, f->fd.smallest_seqno, f->fd.largest_seqno,
f->marked_for_compaction); f->marked_for_compaction, f->oldest_blob_file_number);
} }
} }
edit.SetLogNumber(cfd->GetLogNumber()); edit.SetLogNumber(cfd->GetLogNumber());

@ -35,10 +35,11 @@ class GenerateLevelFilesBriefTest : public testing::Test {
void Add(const char* smallest, const char* largest, void Add(const char* smallest, const char* largest,
SequenceNumber smallest_seq = 100, SequenceNumber smallest_seq = 100,
SequenceNumber largest_seq = 100) { SequenceNumber largest_seq = 100) {
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData(
f->fd = FileDescriptor(files_.size() + 1, 0, 0); files_.size() + 1, 0, 0,
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue); InternalKey(smallest, smallest_seq, kTypeValue),
f->largest = InternalKey(largest, largest_seq, kTypeValue); InternalKey(largest, largest_seq, kTypeValue), smallest_seq,
largest_seq, /* marked_for_compact */ false, kInvalidBlobFileNumber);
files_.push_back(f); files_.push_back(f);
} }
@ -129,28 +130,22 @@ class VersionStorageInfoTest : public testing::Test {
void Add(int level, uint32_t file_number, const char* smallest, void Add(int level, uint32_t file_number, const char* smallest,
const char* largest, uint64_t file_size = 0) { const char* largest, uint64_t file_size = 0) {
assert(level < vstorage_.num_levels()); assert(level < vstorage_.num_levels());
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData(
f->fd = FileDescriptor(file_number, 0, file_size); file_number, 0, file_size, GetInternalKey(smallest, 0),
f->smallest = GetInternalKey(smallest, 0); GetInternalKey(largest, 0), /* smallest_seq */ 0, /* largest_seq */ 0,
f->largest = GetInternalKey(largest, 0); /* marked_for_compact */ false, kInvalidBlobFileNumber);
f->compensated_file_size = file_size; f->compensated_file_size = file_size;
f->refs = 0;
f->num_entries = 0;
f->num_deletions = 0;
vstorage_.AddFile(level, f); vstorage_.AddFile(level, f);
} }
void Add(int level, uint32_t file_number, const InternalKey& smallest, void Add(int level, uint32_t file_number, const InternalKey& smallest,
const InternalKey& largest, uint64_t file_size = 0) { const InternalKey& largest, uint64_t file_size = 0) {
assert(level < vstorage_.num_levels()); assert(level < vstorage_.num_levels());
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData(
f->fd = FileDescriptor(file_number, 0, file_size); file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0,
f->smallest = smallest; /* largest_seq */ 0, /* marked_for_compact */ false,
f->largest = largest; kInvalidBlobFileNumber);
f->compensated_file_size = file_size; f->compensated_file_size = file_size;
f->refs = 0;
f->num_entries = 0;
f->num_deletions = 0;
vstorage_.AddFile(level, f); vstorage_.AddFile(level, f);
} }

Loading…
Cancel
Save