Refactor: group metadata needed to open an SST file to a separate copyable struct

Summary:
We added multiple fields to FileMetaData recently and are planning to add more.
This refactoring separate the minimum information for accessing the file. This object is copyable (FileMetaData is not copyable since the ref counter). I hope this refactoring can enable further improvements:

(1) use it to design a more efficient data structure to speed up read queries.
(2) in the future, when we add information of storage level, we can easily do the encoding, instead of enlarge this structure, which might expand memory work set for file meta data.

The definition is same as current EncodedFileMetaData used in two level iterator, so now the logic in two level iterator is easier to understand.

Test Plan: make all check

Reviewers: haobo, igor, ljin

Reviewed By: ljin

Subscribers: leveldb, dhruba, yhchiang

Differential Revision: https://reviews.facebook.net/D18933
main
sdong 10 years ago
parent 4d913cfbc3
commit cadc1adffa
  1. 12
      db/builder.cc
  2. 2
      db/builder.h
  3. 14
      db/compaction.cc
  4. 88
      db/compaction_picker.cc
  5. 92
      db/db_impl.cc
  6. 4
      db/forward_iterator.cc
  7. 34
      db/repair.cc
  8. 39
      db/table_cache.cc
  9. 12
      db/table_cache.h
  10. 33
      db/version_edit.cc
  11. 35
      db/version_edit.h
  12. 120
      db/version_set.cc
  13. 2
      db/version_set_test.cc

@ -42,7 +42,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
const SequenceNumber earliest_seqno_in_memtable,
const CompressionType compression) {
Status s;
meta->file_size = 0;
meta->fd.file_size = 0;
meta->smallest_seqno = meta->largest_seqno = 0;
iter->SeekToFirst();
@ -54,7 +54,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
purge = false;
}
std::string fname = TableFileName(dbname, meta->number);
std::string fname = TableFileName(dbname, meta->fd.GetNumber());
if (iter->Valid()) {
unique_ptr<WritableFile> file;
s = env->NewWritableFile(fname, &file, soptions);
@ -177,8 +177,8 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
if (s.ok()) {
s = builder->Finish();
if (s.ok()) {
meta->file_size = builder->FileSize();
assert(meta->file_size > 0);
meta->fd.file_size = builder->FileSize();
assert(meta->fd.GetFileSize() > 0);
}
} else {
builder->Abandon();
@ -202,7 +202,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
if (s.ok()) {
// Verify that the table is usable
Iterator* it = table_cache->NewIterator(ReadOptions(), soptions,
internal_comparator, *meta);
internal_comparator, meta->fd);
s = it->status();
delete it;
}
@ -213,7 +213,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
s = iter->status();
}
if (s.ok() && meta->file_size > 0) {
if (s.ok() && meta->fd.GetFileSize() > 0) {
// Keep it
} else {
env->DeleteFile(fname);

@ -29,7 +29,7 @@ extern TableBuilder* NewTableBuilder(
WritableFile* file, CompressionType compression_type);
// Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of
// will be named according to number specified in meta. On success, the rest of
// *meta will be filled with metadata about the generated table.
// If no data is present in *iter, meta->file_size will be set to
// zero, and no Table file will be produced.

@ -21,7 +21,7 @@ namespace rocksdb {
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size;
sum += files[i]->fd.GetFileSize();
}
return sum;
}
@ -90,7 +90,7 @@ bool Compaction::IsDeletionCompaction() const { return deletion_compaction_; }
void Compaction::AddInputDeletions(VersionEdit* edit) {
for (int which = 0; which < 2; which++) {
for (size_t i = 0; i < inputs_[which].size(); i++) {
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
edit->DeleteFile(level_ + which, inputs_[which][i]->fd.GetNumber());
}
}
}
@ -127,7 +127,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) {
icmp->Compare(internal_key,
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
if (seen_key_) {
overlapped_bytes_ += grandparents_[grandparent_index_]->file_size;
overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
}
assert(grandparent_index_ + 1 >= grandparents_.size() ||
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
@ -212,9 +212,9 @@ int InputSummary(const std::vector<FileMetaData*>& files, char* output,
int sz = len - write;
int ret;
char sztxt[16];
AppendHumanBytes(files.at(i)->file_size, sztxt, 16);
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ", files.at(i)->number,
sztxt);
AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
files.at(i)->fd.GetNumber(), sztxt);
if (ret < 0 || ret >= sz) break;
write += ret;
}
@ -258,7 +258,7 @@ uint64_t Compaction::OutputFilePreallocationSize() {
cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
} else {
for (const auto& f : inputs_[0]) {
preallocation_size += f->file_size;
preallocation_size += f->fd.GetFileSize();
}
}
// Over-estimate slightly so we don't end up just barely crossing

@ -22,7 +22,7 @@ namespace {
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size;
sum += files[i]->fd.GetFileSize();
}
return sum;
}
@ -80,7 +80,7 @@ void CompactionPicker::SizeBeingCompacted(std::vector<uint64_t>& sizes) {
for (auto c : compactions_in_progress_[level]) {
assert(c->level() == level);
for (int i = 0; i < c->num_input_files(0); i++) {
total += c->input(0,i)->file_size;
total += c->input(0, i)->fd.GetFileSize();
}
}
sizes[level] = total;
@ -335,7 +335,7 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level,
MaxFileSizeForLevel(input_level) * options_->source_compaction_factor;
uint64_t total = 0;
for (size_t i = 0; i + 1 < inputs.size(); ++i) {
uint64_t s = inputs[i]->file_size;
uint64_t s = inputs[i]->fd.GetFileSize();
total += s;
if (total >= limit) {
**compaction_end = inputs[i + 1]->smallest;
@ -508,10 +508,11 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version,
FileMetaData* f = c->input_version_->files_[level][index];
// check to verify files are arranged in descending size
assert((i == file_size.size() - 1) ||
(i >= Version::number_of_files_to_sort_ - 1) ||
(f->file_size >=
c->input_version_->files_[level][file_size[i + 1]]->file_size));
assert(
(i == file_size.size() - 1) ||
(i >= Version::number_of_files_to_sort_ - 1) ||
(f->fd.GetFileSize() >=
c->input_version_->files_[level][file_size[i + 1]]->fd.GetFileSize()));
// do not pick a file to compact if it is being compacted
// from n-1 level.
@ -680,19 +681,21 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
candidate_count = 1;
break;
}
LogToBuffer(
log_buffer, "[%s] Universal: file %lu[%d] being compacted, skipping",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop);
LogToBuffer(log_buffer,
"[%s] Universal: file %lu[%d] being compacted, skipping",
version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), loop);
f = nullptr;
}
// This file is not being compacted. Consider it as the
// first candidate to be compacted.
uint64_t candidate_size = f != nullptr? f->file_size : 0;
uint64_t candidate_size = f != nullptr ? f->fd.GetFileSize() : 0;
if (f != nullptr) {
LogToBuffer(
log_buffer, "[%s] Universal: Possible candidate file %lu[%d].",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop);
LogToBuffer(log_buffer,
"[%s] Universal: Possible candidate file %lu[%d].",
version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), loop);
}
// Check if the suceeding files need compaction.
@ -711,13 +714,13 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
// kCompactionStopStyleSimilarSize, it's simply the size of the last
// picked file.
uint64_t sz = (candidate_size * (100L + ratio)) /100;
if (sz < f->file_size) {
if (sz < f->fd.GetFileSize()) {
break;
}
if (options_->compaction_options_universal.stop_style == kCompactionStopStyleSimilarSize) {
// Similar-size stopping rule: also check the last picked file isn't
// far larger than the next candidate file.
sz = (f->file_size * (100L + ratio)) / 100;
sz = (f->fd.GetFileSize() * (100L + ratio)) / 100;
if (sz < candidate_size) {
// If the small file we've encountered begins a run of similar-size
// files, we'll pick them up on a future iteration of the outer
@ -725,9 +728,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
// by the last-resort read amp strategy which disregards size ratios.
break;
}
candidate_size = f->file_size;
candidate_size = f->fd.GetFileSize();
} else { // default kCompactionStopStyleTotalSize
candidate_size += f->file_size;
candidate_size += f->fd.GetFileSize();
}
candidate_count++;
}
@ -744,8 +747,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
FileMetaData* f = version->files_[level][index];
LogToBuffer(log_buffer,
"[%s] Universal: Skipping file %lu[%d] with size %lu %d\n",
version->cfd_->GetName().c_str(), (unsigned long)f->number,
i, (unsigned long)f->file_size, f->being_compacted);
version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), i,
(unsigned long)f->fd.GetFileSize(), f->being_compacted);
}
}
}
@ -763,7 +767,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
uint64_t older_file_size = 0;
for (unsigned int i = file_by_time.size() - 1; i >= first_index_after;
i--) {
older_file_size += version->files_[level][file_by_time[i]]->file_size;
older_file_size +=
version->files_[level][file_by_time[i]]->fd.GetFileSize();
if (older_file_size * 100L >= total_size * (long) ratio_to_compress) {
enable_compression = false;
break;
@ -779,10 +784,10 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
int index = file_by_time[i];
FileMetaData* f = c->input_version_->files_[level][index];
c->inputs_[0].push_back(f);
LogToBuffer(log_buffer,
"[%s] Universal: Picking file %lu[%d] with size %lu\n",
version->cfd_->GetName().c_str(), (unsigned long)f->number, i,
(unsigned long)f->file_size);
LogToBuffer(
log_buffer, "[%s] Universal: Picking file %lu[%d] with size %lu\n",
version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(), i,
(unsigned long)f->fd.GetFileSize());
}
return c;
}
@ -818,10 +823,10 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
start_index = loop; // Consider this as the first candidate.
break;
}
LogToBuffer(log_buffer,
"[%s] Universal: skipping file %lu[%d] compacted %s",
version->cfd_->GetName().c_str(), (unsigned long)f->number,
loop, " cannot be a candidate to reduce size amp.\n");
LogToBuffer(
log_buffer, "[%s] Universal: skipping file %lu[%d] compacted %s",
version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(),
loop, " cannot be a candidate to reduce size amp.\n");
f = nullptr;
}
if (f == nullptr) {
@ -829,8 +834,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
}
LogToBuffer(log_buffer, "[%s] Universal: First candidate file %lu[%d] %s",
version->cfd_->GetName().c_str(), (unsigned long)f->number,
start_index, " to reduce size amp.\n");
version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), start_index,
" to reduce size amp.\n");
// keep adding up all the remaining files
for (unsigned int loop = start_index; loop < file_by_time.size() - 1;
@ -840,11 +846,12 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
if (f->being_compacted) {
LogToBuffer(
log_buffer, "[%s] Universal: Possible candidate file %lu[%d] %s.",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop,
version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(),
loop,
" is already being compacted. No size amp reduction possible.\n");
return nullptr;
}
candidate_size += f->file_size;
candidate_size += f->fd.GetFileSize();
candidate_count++;
}
if (candidate_count == 0) {
@ -853,7 +860,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
// size of earliest file
int index = file_by_time[file_by_time.size() - 1];
uint64_t earliest_file_size = version->files_[level][index]->file_size;
uint64_t earliest_file_size = version->files_[level][index]->fd.GetFileSize();
// size amplification = percentage of additional size
if (candidate_size * 100 < ratio * earliest_file_size) {
@ -885,8 +892,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
c->inputs_[0].push_back(f);
LogToBuffer(log_buffer,
"[%s] Universal: size amp picking file %lu[%d] with size %lu",
version->cfd_->GetName().c_str(), (unsigned long)f->number,
index, (unsigned long)f->file_size);
version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), index,
(unsigned long)f->fd.GetFileSize());
}
return c;
}
@ -896,7 +904,7 @@ Compaction* FIFOCompactionPicker::PickCompaction(Version* version,
assert(version->NumberLevels() == 1);
uint64_t total_size = 0;
for (const auto& file : version->files_[0]) {
total_size += file->file_size;
total_size += file->fd.GetFileSize();
}
if (total_size <= options_->compaction_options_fifo.max_table_files_size ||
@ -924,13 +932,13 @@ Compaction* FIFOCompactionPicker::PickCompaction(Version* version,
for (auto ritr = version->files_[0].rbegin();
ritr != version->files_[0].rend(); ++ritr) {
auto f = *ritr;
total_size -= f->file_size;
total_size -= f->fd.GetFileSize();
c->inputs_[0].push_back(f);
char tmp_fsize[16];
AppendHumanBytes(f->file_size, tmp_fsize, sizeof(tmp_fsize));
AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
LogToBuffer(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64
" with size %s for deletion",
version->cfd_->GetName().c_str(), f->number, tmp_fsize);
version->cfd_->GetName().c_str(), f->fd.GetNumber(), tmp_fsize);
if (total_size <= options_->compaction_options_fifo.max_table_files_size) {
break;
}

@ -644,8 +644,7 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
const char* kDumbDbName = "";
for (auto file : state.sst_delete_files) {
candidate_files.push_back(
TableFileName(kDumbDbName, file->number).substr(1)
);
TableFileName(kDumbDbName, file->fd.GetNumber()).substr(1));
delete file;
}
@ -1370,14 +1369,14 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros();
FileMetaData meta;
meta.number = versions_->NewFileNumber();
pending_outputs_.insert(meta.number);
meta.fd.number = versions_->NewFileNumber();
pending_outputs_.insert(meta.fd.GetNumber());
Iterator* iter = mem->NewIterator(ReadOptions(), true);
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
const SequenceNumber earliest_seqno_in_memtable =
mem->GetFirstSequenceNumber();
Log(options_.info_log, "[%s] Level-0 table #%lu: started",
cfd->GetName().c_str(), (unsigned long)meta.number);
cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber());
Status s;
{
@ -1391,27 +1390,28 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
}
Log(options_.info_log, "[%s] Level-0 table #%lu: %lu bytes %s",
cfd->GetName().c_str(), (unsigned long)meta.number,
(unsigned long)meta.file_size, s.ToString().c_str());
cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber(),
(unsigned long)meta.fd.GetFileSize(), s.ToString().c_str());
delete iter;
pending_outputs_.erase(meta.number);
pending_outputs_.erase(meta.fd.GetNumber());
// Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest.
int level = 0;
if (s.ok() && meta.file_size > 0) {
edit->AddFile(level, meta.number, meta.file_size,
meta.smallest, meta.largest,
meta.smallest_seqno, meta.largest_seqno);
if (s.ok() && meta.fd.GetFileSize() > 0) {
edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetFileSize(),
meta.smallest, meta.largest, meta.smallest_seqno,
meta.largest_seqno);
}
InternalStats::CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros;
stats.bytes_written = meta.file_size;
stats.bytes_written = meta.fd.GetFileSize();
stats.files_out_levelnp1 = 1;
cfd->internal_stats()->AddCompactionStats(level, stats);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s;
}
@ -1421,9 +1421,9 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros();
FileMetaData meta;
meta.number = versions_->NewFileNumber();
*filenumber = meta.number;
pending_outputs_.insert(meta.number);
meta.fd.number = versions_->NewFileNumber();
*filenumber = meta.fd.GetNumber();
pending_outputs_.insert(meta.fd.GetNumber());
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
const SequenceNumber earliest_seqno_in_memtable =
@ -1443,7 +1443,7 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
Iterator* iter = NewMergingIterator(&cfd->internal_comparator(),
&memtables[0], memtables.size());
Log(options_.info_log, "[%s] Level-0 flush table #%lu: started",
cfd->GetName().c_str(), (unsigned long)meta.number);
cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber());
s = BuildTable(dbname_, env_, *cfd->options(), storage_options_,
cfd->table_cache(), iter, &meta, cfd->internal_comparator(),
@ -1452,8 +1452,8 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
LogFlush(options_.info_log);
delete iter;
Log(options_.info_log, "[%s] Level-0 flush table #%lu: %lu bytes %s",
cfd->GetName().c_str(), (unsigned long)meta.number,
(unsigned long)meta.file_size, s.ToString().c_str());
cfd->GetName().c_str(), (unsigned long)meta.fd.GetFileSize(),
(unsigned long)meta.fd.GetFileSize(), s.ToString().c_str());
if (!options_.disableDataSync) {
db_directory_->Fsync();
@ -1477,7 +1477,7 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
// Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest.
int level = 0;
if (s.ok() && meta.file_size > 0) {
if (s.ok() && meta.fd.GetFileSize() > 0) {
const Slice min_user_key = meta.smallest.user_key();
const Slice max_user_key = meta.largest.user_key();
// if we have more than 1 background thread, then we cannot
@ -1488,16 +1488,17 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
cfd->options()->compaction_style == kCompactionStyleLevel) {
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
}
edit->AddFile(level, meta.number, meta.file_size,
meta.smallest, meta.largest,
meta.smallest_seqno, meta.largest_seqno);
edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetFileSize(),
meta.smallest, meta.largest, meta.smallest_seqno,
meta.largest_seqno);
}
InternalStats::CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros;
stats.bytes_written = meta.file_size;
stats.bytes_written = meta.fd.GetFileSize();
cfd->internal_stats()->AddCompactionStats(level, stats);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s;
}
@ -1688,9 +1689,10 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
VersionEdit edit;
edit.SetColumnFamily(cfd->GetID());
for (const auto& f : cfd->current()->files_[level]) {
edit.DeleteFile(level, f->number);
edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest,
f->smallest_seqno, f->largest_seqno);
edit.DeleteFile(level, f->fd.GetNumber());
edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetFileSize(),
f->smallest, f->largest, f->smallest_seqno,
f->largest_seqno);
}
Log(options_.info_log, "[%s] Apply version edit:\n%s",
cfd->GetName().c_str(), edit.DebugString().data());
@ -2172,7 +2174,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
assert(c->column_family_data()->options()->compaction_style ==
kCompactionStyleFIFO);
for (const auto& f : *c->inputs(0)) {
c->edit()->DeleteFile(c->level(), f->number);
c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
}
status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_,
db_directory_.get());
@ -2186,21 +2188,21 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
// Move file to next level
assert(c->num_input_files(0) == 1);
FileMetaData* f = c->input(0, 0);
c->edit()->DeleteFile(c->level(), f->number);
c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
f->smallest, f->largest,
f->smallest_seqno, f->largest_seqno);
c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
c->edit()->AddFile(c->level() + 1, f->fd.GetNumber(), f->fd.GetFileSize(),
f->smallest, f->largest, f->smallest_seqno,
f->largest_seqno);
status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_,
db_directory_.get());
InstallSuperVersion(c->column_family_data(), deletion_state);
Version::LevelSummaryStorage tmp;
LogToBuffer(log_buffer, "[%s] Moved #%lld to level-%d %lld bytes %s: %s\n",
c->column_family_data()->GetName().c_str(),
static_cast<unsigned long long>(f->number), c->level() + 1,
static_cast<unsigned long long>(f->file_size),
status.ToString().c_str(),
c->input_version()->LevelSummary(&tmp));
LogToBuffer(
log_buffer, "[%s] Moved #%lld to level-%d %lld bytes %s: %s\n",
c->column_family_data()->GetName().c_str(),
static_cast<unsigned long long>(f->fd.GetNumber()), c->level() + 1,
static_cast<unsigned long long>(f->fd.GetFileSize()),
status.ToString().c_str(), c->input_version()->LevelSummary(&tmp));
c->ReleaseCompactionFiles(status);
*madeProgress = true;
} else {
@ -2394,7 +2396,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
if (s.ok() && current_entries > 0) {
// Verify that the table is usable
ColumnFamilyData* cfd = compact->compaction->column_family_data();
FileMetaData meta(output_number, current_bytes);
FileDescriptor meta(output_number, current_bytes);
Iterator* iter = cfd->table_cache()->NewIterator(
ReadOptions(), storage_options_, cfd->internal_comparator(), meta);
s = iter->status();
@ -3094,15 +3096,15 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
stats.files_out_levelnp1 = num_output_files;
for (int i = 0; i < compact->compaction->num_input_files(0); i++) {
stats.bytes_readn += compact->compaction->input(0, i)->file_size;
stats.bytes_readn += compact->compaction->input(0, i)->fd.GetFileSize();
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
compact->compaction->input(0, i)->file_size);
compact->compaction->input(0, i)->fd.GetFileSize());
}
for (int i = 0; i < compact->compaction->num_input_files(1); i++) {
stats.bytes_readnp1 += compact->compaction->input(1, i)->file_size;
stats.bytes_readnp1 += compact->compaction->input(1, i)->fd.GetFileSize();
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
compact->compaction->input(1, i)->file_size);
compact->compaction->input(1, i)->fd.GetFileSize());
}
for (int i = 0; i < num_output_files; i++) {

@ -39,7 +39,7 @@ class LevelIterator : public Iterator {
file_index_ = file_index;
file_iter_.reset(cfd_->table_cache()->NewIterator(
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
*(files_[file_index_]), nullptr /* table_reader_ptr */, false));
files_[file_index_]->fd, nullptr /* table_reader_ptr */, false));
}
valid_ = false;
}
@ -293,7 +293,7 @@ void ForwardIterator::RebuildIterators() {
l0_iters_.reserve(l0_files.size());
for (const auto* l0 : l0_files) {
l0_iters_.push_back(cfd_->table_cache()->NewIterator(
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0));
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), l0->fd));
}
level_iters_.reserve(sv_->current->NumberLevels() - 1);
for (int32_t level = 1; level < sv_->current->NumberLevels(); ++level) {

@ -84,7 +84,7 @@ class Repairer {
if (status.ok()) {
unsigned long long bytes = 0;
for (size_t i = 0; i < tables_.size(); i++) {
bytes += tables_[i].meta.file_size;
bytes += tables_[i].meta.fd.GetFileSize();
}
Log(options_.info_log,
"**** Repaired rocksdb %s; "
@ -230,7 +230,7 @@ class Repairer {
// Do not record a version edit for this conversion to a Table
// since ExtractMetaData() will also generate edits.
FileMetaData meta;
meta.number = next_file_number_++;
meta.fd.number = next_file_number_++;
ReadOptions ro;
Iterator* iter = mem->NewIterator(ro, true /* enforce_total_order */);
status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_,
@ -240,22 +240,20 @@ class Repairer {
delete cf_mems_default;
mem = nullptr;
if (status.ok()) {
if (meta.file_size > 0) {
table_numbers_.push_back(meta.number);
if (meta.fd.GetFileSize() > 0) {
table_numbers_.push_back(meta.fd.GetNumber());
}
}
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
(unsigned long long) log,
counter,
(unsigned long long) meta.number,
status.ToString().c_str());
(unsigned long long)log, counter,
(unsigned long long)meta.fd.GetNumber(), status.ToString().c_str());
return status;
}
void ExtractMetaData() {
for (size_t i = 0; i < table_numbers_.size(); i++) {
TableInfo t;
t.meta.number = table_numbers_[i];
t.meta.fd.number = table_numbers_[i];
Status status = ScanTable(&t);
if (!status.ok()) {
std::string fname = TableFileName(dbname_, table_numbers_[i]);
@ -270,13 +268,12 @@ class Repairer {
}
Status ScanTable(TableInfo* t) {
std::string fname = TableFileName(dbname_, t->meta.number);
std::string fname = TableFileName(dbname_, t->meta.fd.GetNumber());
int counter = 0;
Status status = env_->GetFileSize(fname, &t->meta.file_size);
Status status = env_->GetFileSize(fname, &t->meta.fd.file_size);
if (status.ok()) {
FileMetaData dummy_meta(t->meta.number, t->meta.file_size);
Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, icmp_, dummy_meta);
ReadOptions(), storage_options_, icmp_, t->meta.fd);
bool empty = true;
ParsedInternalKey parsed;
t->min_sequence = 0;
@ -285,7 +282,7 @@ class Repairer {
Slice key = iter->key();
if (!ParseInternalKey(key, &parsed)) {
Log(options_.info_log, "Table #%llu: unparsable key %s",
(unsigned long long) t->meta.number,
(unsigned long long)t->meta.fd.GetNumber(),
EscapeString(key).c_str());
continue;
}
@ -309,8 +306,7 @@ class Repairer {
delete iter;
}
Log(options_.info_log, "Table #%llu: %d entries %s",
(unsigned long long) t->meta.number,
counter,
(unsigned long long)t->meta.fd.GetNumber(), counter,
status.ToString().c_str());
return status;
}
@ -339,9 +335,9 @@ class Repairer {
for (size_t i = 0; i < tables_.size(); i++) {
// TODO(opt): separate out into multiple levels
const TableInfo& t = tables_[i];
edit_->AddFile(0, t.meta.number, t.meta.file_size,
t.meta.smallest, t.meta.largest,
t.min_sequence, t.max_sequence);
edit_->AddFile(0, t.meta.fd.GetNumber(), t.meta.fd.GetFileSize(),
t.meta.smallest, t.meta.largest, t.min_sequence,
t.max_sequence);
}
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());

@ -31,7 +31,7 @@ static void UnrefEntry(void* arg1, void* arg2) {
cache->Release(h);
}
static Slice GetSliceForFileNumber(uint64_t* file_number) {
static Slice GetSliceForFileNumber(const uint64_t* file_number) {
return Slice(reinterpret_cast<const char*>(file_number),
sizeof(*file_number));
}
@ -57,11 +57,10 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
Status TableCache::FindTable(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
uint64_t file_number, uint64_t file_size,
Cache::Handle** handle, bool* table_io,
const bool no_io) {
const FileDescriptor& fd, Cache::Handle** handle,
bool* table_io, const bool no_io) {
Status s;
Slice key = GetSliceForFileNumber(&file_number);
Slice key = GetSliceForFileNumber(&fd.number);
*handle = cache_->Lookup(key);
if (*handle == nullptr) {
if (no_io) { // Dont do IO and return a not-found status
@ -70,7 +69,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
if (table_io != nullptr) {
*table_io = true; // we had to do IO from storage
}
std::string fname = TableFileName(dbname_, file_number);
std::string fname = TableFileName(dbname_, fd.GetNumber());
unique_ptr<RandomAccessFile> file;
unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions);
@ -81,8 +80,8 @@ Status TableCache::FindTable(const EnvOptions& toptions,
}
StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS);
s = options_->table_factory->NewTableReader(
*options_, toptions, internal_comparator, std::move(file), file_size,
&table_reader);
*options_, toptions, internal_comparator, std::move(file),
fd.GetFileSize(), &table_reader);
}
if (!s.ok()) {
@ -101,18 +100,18 @@ Status TableCache::FindTable(const EnvOptions& toptions,
Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions,
const InternalKeyComparator& icomparator,
const FileMetaData& file_meta,
const FileDescriptor& fd,
TableReader** table_reader_ptr,
bool for_compaction, Arena* arena) {
if (table_reader_ptr != nullptr) {
*table_reader_ptr = nullptr;
}
TableReader* table_reader = file_meta.table_reader;
TableReader* table_reader = fd.table_reader;
Cache::Handle* handle = nullptr;
Status s;
if (table_reader == nullptr) {
s = FindTable(toptions, icomparator, file_meta.number, file_meta.file_size,
&handle, nullptr, options.read_tier == kBlockCacheTier);
s = FindTable(toptions, icomparator, fd, &handle, nullptr,
options.read_tier == kBlockCacheTier);
if (!s.ok()) {
return NewErrorIterator(s, arena);
}
@ -136,16 +135,15 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Status TableCache::Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const Slice& k, void* arg,
const FileDescriptor& fd, const Slice& k, void* arg,
bool (*saver)(void*, const ParsedInternalKey&,
const Slice&, bool),
bool* table_io, void (*mark_key_may_exist)(void*)) {
TableReader* t = file_meta.table_reader;
TableReader* t = fd.table_reader;
Status s;
Cache::Handle* handle = nullptr;
if (!t) {
s = FindTable(storage_options_, internal_comparator, file_meta.number,
file_meta.file_size, &handle, table_io,
s = FindTable(storage_options_, internal_comparator, fd, &handle, table_io,
options.read_tier == kBlockCacheTier);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
@ -165,11 +163,10 @@ Status TableCache::Get(const ReadOptions& options,
}
Status TableCache::GetTableProperties(
const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
std::shared_ptr<const TableProperties>* properties, bool no_io) {
Status s;
auto table_reader = file_meta.table_reader;
auto table_reader = fd.table_reader;
// table already been pre-loaded?
if (table_reader) {
*properties = table_reader->GetTableProperties();
@ -179,8 +176,8 @@ Status TableCache::GetTableProperties(
bool table_io;
Cache::Handle* table_handle = nullptr;
s = FindTable(toptions, internal_comparator, file_meta.number,
file_meta.file_size, &table_handle, &table_io, no_io);
s = FindTable(toptions, internal_comparator, fd, &table_handle, &table_io,
no_io);
if (!s.ok()) {
return s;
}

@ -24,10 +24,8 @@ namespace rocksdb {
class Env;
class Arena;
struct FileMetaData;
struct FileDescriptor;
// TODO(sdong): try to come up with a better API to pass the file information
// other than simply passing FileMetaData.
class TableCache {
public:
TableCache(const std::string& dbname, const Options* options,
@ -43,7 +41,7 @@ class TableCache {
// returned iterator is live.
Iterator* NewIterator(const ReadOptions& options, const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta,
const FileDescriptor& file_fd,
TableReader** table_reader_ptr = nullptr,
bool for_compaction = false, Arena* arena = nullptr);
@ -52,7 +50,7 @@ class TableCache {
// it returns false.
Status Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const Slice& k, void* arg,
const FileDescriptor& file_fd, const Slice& k, void* arg,
bool (*handle_result)(void*, const ParsedInternalKey&,
const Slice&, bool),
bool* table_io, void (*mark_key_may_exist)(void*) = nullptr);
@ -63,7 +61,7 @@ class TableCache {
// Find table reader
Status FindTable(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
uint64_t file_number, uint64_t file_size, Cache::Handle**,
const FileDescriptor& file_fd, Cache::Handle**,
bool* table_io = nullptr, const bool no_io = false);
// Get TableReader from a cache handle.
@ -77,7 +75,7 @@ class TableCache {
// we set `no_io` to be true.
Status GetTableProperties(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta,
const FileDescriptor& file_meta,
std::shared_ptr<const TableProperties>* properties,
bool no_io = false);

@ -95,8 +95,8 @@ void VersionEdit::EncodeTo(std::string* dst) const {
const FileMetaData& f = new_files_[i].second;
PutVarint32(dst, kNewFile2);
PutVarint32(dst, new_files_[i].first); // level
PutVarint64(dst, f.number);
PutVarint64(dst, f.file_size);
PutVarint64(dst, f.fd.GetNumber());
PutVarint64(dst, f.fd.GetFileSize());
PutLengthPrefixedSlice(dst, f.smallest.Encode());
PutLengthPrefixedSlice(dst, f.largest.Encode());
PutVarint64(dst, f.smallest_seqno);
@ -230,12 +230,14 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
}
break;
case kNewFile:
if (GetLevel(&input, &level, &msg) &&
GetVarint64(&input, &f.number) &&
GetVarint64(&input, &f.file_size) &&
case kNewFile: {
uint64_t number;
uint64_t file_size;
if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
GetVarint64(&input, &file_size) &&
GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest)) {
f.fd = FileDescriptor(number, file_size);
new_files_.push_back(std::make_pair(level, f));
} else {
if (!msg) {
@ -243,15 +245,17 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
}
}
break;
case kNewFile2:
if (GetLevel(&input, &level, &msg) &&
GetVarint64(&input, &f.number) &&
GetVarint64(&input, &f.file_size) &&
}
case kNewFile2: {
uint64_t number;
uint64_t file_size;
if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
GetVarint64(&input, &file_size) &&
GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest) &&
GetVarint64(&input, &f.smallest_seqno) &&
GetVarint64(&input, &f.largest_seqno) ) {
GetVarint64(&input, &f.largest_seqno)) {
f.fd = FileDescriptor(number, file_size);
new_files_.push_back(std::make_pair(level, f));
} else {
if (!msg) {
@ -259,6 +263,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
}
}
break;
}
case kColumnFamily:
if (!GetVarint32(&input, &column_family_)) {
@ -336,9 +341,9 @@ std::string VersionEdit::DebugString(bool hex_key) const {
r.append("\n AddFile: ");
AppendNumberTo(&r, new_files_[i].first);
r.append(" ");
AppendNumberTo(&r, f.number);
AppendNumberTo(&r, f.fd.GetNumber());
r.append(" ");
AppendNumberTo(&r, f.file_size);
AppendNumberTo(&r, f.fd.GetFileSize());
r.append(" ");
r.append(f.smallest.DebugString(hex_key));
r.append(" .. ");

@ -19,11 +19,28 @@ namespace rocksdb {
class VersionSet;
// A copyable structure contains information needed to read data from an SST
// file. It can contains a pointer to a table reader opened for the file, or
// file number and size, which can be used to create a new table reader for it.
// The behavior is undefined when a copied of the structure is used when the
// file is not in any live version any more.
struct FileDescriptor {
uint64_t number;
uint64_t file_size; // File size in bytes
// Table reader in table_reader_handle
TableReader* table_reader;
FileDescriptor(uint64_t number, uint64_t file_size)
: number(number), file_size(file_size), table_reader(nullptr) {}
uint64_t GetNumber() const { return number; }
uint64_t GetFileSize() const { return file_size; }
};
struct FileMetaData {
int refs;
FileDescriptor fd;
int allowed_seeks; // Seeks allowed until compaction
uint64_t number;
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table
bool being_compacted; // Is this file undergoing compaction?
@ -32,18 +49,13 @@ struct FileMetaData {
// Needs to be disposed when refs becomes 0.
Cache::Handle* table_reader_handle;
// Table reader in table_reader_handle
TableReader* table_reader;
FileMetaData(uint64_t number, uint64_t file_size)
FileMetaData()
: refs(0),
fd(0, 0),
allowed_seeks(1 << 30),
number(number),
file_size(file_size),
being_compacted(false),
table_reader_handle(nullptr),
table_reader(nullptr) {}
FileMetaData() : FileMetaData(0, 0) {}
table_reader_handle(nullptr) {}
};
class VersionEdit {
@ -89,8 +101,7 @@ class VersionEdit {
const SequenceNumber& largest_seqno) {
assert(smallest_seqno <= largest_seqno);
FileMetaData f;
f.number = file;
f.file_size = file_size;
f.fd = FileDescriptor(file, file_size);
f.smallest = smallest;
f.largest = largest;
f.smallest_seqno = smallest_seqno;

@ -42,7 +42,7 @@ namespace rocksdb {
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size;
sum += files[i]->fd.GetFileSize();
}
return sum;
}
@ -150,18 +150,6 @@ bool SomeFileOverlapsRange(
return !BeforeFile(ucmp, largest_user_key, files[index]);
}
namespace {
// Used for LevelFileNumIterator to pass "block handle" value,
// which actually means file information in this iterator.
// It contains subset of fields of FileMetaData, that is sufficient
// for table cache to use.
struct EncodedFileMetaData {
uint64_t number; // file number
uint64_t file_size; // file size
TableReader* table_reader; // cached table reader
};
} // namespace
// An internal iterator. For a given version/level pair, yields
// information about the files in the level. For a given entry, key()
// is the largest key that occurs in the file, and value() is an
@ -173,7 +161,8 @@ class Version::LevelFileNumIterator : public Iterator {
const std::vector<FileMetaData*>* flist)
: icmp_(icmp),
flist_(flist),
index_(flist->size()) { // Marks as invalid
index_(flist->size()),
current_value_(0, 0) { // Marks as invalid
}
virtual bool Valid() const {
return index_ < flist_->size();
@ -204,18 +193,16 @@ class Version::LevelFileNumIterator : public Iterator {
Slice value() const {
assert(Valid());
auto* file_meta = (*flist_)[index_];
current_value_.number = file_meta->number;
current_value_.file_size = file_meta->file_size;
current_value_.table_reader = file_meta->table_reader;
current_value_ = file_meta->fd;
return Slice(reinterpret_cast<const char*>(&current_value_),
sizeof(EncodedFileMetaData));
sizeof(FileDescriptor));
}
virtual Status status() const { return Status::OK(); }
private:
const InternalKeyComparator icmp_;
const std::vector<FileMetaData*>* const flist_;
uint32_t index_;
mutable EncodedFileMetaData current_value_;
mutable FileDescriptor current_value_;
};
class Version::LevelFileIteratorState : public TwoLevelIteratorState {
@ -230,17 +217,15 @@ class Version::LevelFileIteratorState : public TwoLevelIteratorState {
for_compaction_(for_compaction) {}
Iterator* NewSecondaryIterator(const Slice& meta_handle) override {
if (meta_handle.size() != sizeof(EncodedFileMetaData)) {
if (meta_handle.size() != sizeof(FileDescriptor)) {
return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value"));
} else {
const EncodedFileMetaData* encoded_meta =
reinterpret_cast<const EncodedFileMetaData*>(meta_handle.data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
meta.table_reader = encoded_meta->table_reader;
return table_cache_->NewIterator(read_options_, env_options_,
icomparator_, meta, nullptr /* don't need reference to table*/,
for_compaction_);
const FileDescriptor* fd =
reinterpret_cast<const FileDescriptor*>(meta_handle.data());
return table_cache_->NewIterator(
read_options_, env_options_, icomparator_, *fd,
nullptr /* don't need reference to table*/, for_compaction_);
}
}
@ -261,12 +246,12 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
auto options = cfd_->options();
for (int level = 0; level < num_levels_; level++) {
for (const auto& file_meta : files_[level]) {
auto fname = TableFileName(vset_->dbname_, file_meta->number);
auto fname = TableFileName(vset_->dbname_, file_meta->fd.GetNumber());
// 1. If the table is already present in table cache, load table
// properties from there.
std::shared_ptr<const TableProperties> table_properties;
Status s = table_cache->GetTableProperties(
vset_->storage_options_, cfd_->internal_comparator(), *file_meta,
vset_->storage_options_, cfd_->internal_comparator(), file_meta->fd,
&table_properties, true /* no io */);
if (s.ok()) {
props->insert({fname, table_properties});
@ -292,7 +277,7 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
// By setting the magic number to kInvalidTableMagicNumber, we can by
// pass the magic number check in the footer.
s = ReadTableProperties(
file.get(), file_meta->file_size,
file.get(), file_meta->fd.GetFileSize(),
Footer::kInvalidTableMagicNumber /* table's magic number */,
vset_->env_, options->info_log.get(), &raw_table_properties);
if (!s.ok()) {
@ -315,7 +300,7 @@ void Version::AddIterators(const ReadOptions& read_options,
// Merge all level zero files together since they may overlap
for (const FileMetaData* file : files_[0]) {
iters->push_back(cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), *file));
read_options, soptions, cfd_->internal_comparator(), file->fd));
}
// For levels > 0, we can use a concatenating iterator that sequentially
@ -338,7 +323,7 @@ void Version::AddIterators(const ReadOptions& read_options,
// Merge all level zero files together since they may overlap
for (const FileMetaData* file : files_[0]) {
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), *file, nullptr,
read_options, soptions, cfd_->internal_comparator(), file->fd, nullptr,
false, merge_iter_builder->GetArena()));
}
@ -461,7 +446,7 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
namespace {
bool NewestFirst(FileMetaData* a, FileMetaData* b) {
return a->number > b->number;
return a->fd.GetNumber() > b->fd.GetNumber();
}
bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
if (a->smallest_seqno != b->smallest_seqno) {
@ -480,7 +465,7 @@ bool BySmallestKey(FileMetaData* a, FileMetaData* b,
return (r < 0);
}
// Break ties by file number
return (a->number < b->number);
return (a->fd.GetNumber() < b->fd.GetNumber());
}
} // anonymous namespace
@ -571,7 +556,7 @@ void Version::Get(const ReadOptions& options,
// Prefetch table data to avoid cache miss if possible
if (level == 0) {
for (int i = 0; i < num_files; ++i) {
auto* r = files_[0][i]->table_reader;
auto* r = files_[0][i]->fd.table_reader;
if (r) {
r->Prepare(ikey);
}
@ -680,7 +665,7 @@ void Version::Get(const ReadOptions& options,
prev_file = f;
#endif
bool tableIO = false;
*status = table_cache_->Get(options, *internal_comparator_, *f, ikey,
*status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
&saver, SaveValue, &tableIO, MarkKeyMayExist);
// TODO: examine the behavior for corrupted key
if (!status->ok()) {
@ -793,7 +778,7 @@ void Version::ComputeCompactionScore(
uint64_t total_size = 0;
for (unsigned int i = 0; i < files_[level].size(); i++) {
if (!files_[level][i]->being_compacted) {
total_size += files_[level][i]->file_size;
total_size += files_[level][i]->fd.GetFileSize();
numfiles++;
}
}
@ -850,7 +835,7 @@ namespace {
// In normal mode: descending size
bool CompareSizeDescending(const Version::Fsize& first,
const Version::Fsize& second) {
return (first.file->file_size > second.file->file_size);
return (first.file->fd.GetFileSize() > second.file->fd.GetFileSize());
}
// A static compator used to sort files based on their seqno
// In universal style : descending seqno
@ -1245,10 +1230,10 @@ const char* Version::LevelFileSummary(FileSummaryStorage* scratch,
for (const auto& f : files_[level]) {
int sz = sizeof(scratch->buffer) - len;
char sztxt[16];
AppendHumanBytes(f->file_size, sztxt, 16);
AppendHumanBytes(f->fd.GetFileSize(), sztxt, 16);
int ret = snprintf(scratch->buffer + len, sz,
"#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->number,
f->smallest_seqno, sztxt,
"#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ",
f->fd.GetNumber(), f->smallest_seqno, sztxt,
static_cast<int>(f->being_compacted));
if (ret < 0 || ret >= sz)
break;
@ -1281,7 +1266,7 @@ void Version::AddLiveFiles(std::set<uint64_t>* live) {
for (int level = 0; level < NumberLevels(); level++) {
const std::vector<FileMetaData*>& files = files_[level];
for (const auto& file : files) {
live->insert(file->number);
live->insert(file->fd.GetNumber());
}
}
}
@ -1301,9 +1286,9 @@ std::string Version::DebugString(bool hex) const {
const std::vector<FileMetaData*>& files = files_[level];
for (size_t i = 0; i < files.size(); i++) {
r.push_back(' ');
AppendNumberTo(&r, files[i]->number);
AppendNumberTo(&r, files[i]->fd.GetNumber());
r.push_back(':');
AppendNumberTo(&r, files[i]->file_size);
AppendNumberTo(&r, files[i]->fd.GetFileSize());
r.append("[");
r.append(files[i]->smallest.DebugString(hex));
r.append(" .. ");
@ -1452,7 +1437,7 @@ class VersionSet::Builder {
const std::vector<FileMetaData*>& base_files = base_->files_[l];
for (unsigned int i = 0; i < base_files.size(); i++) {
FileMetaData* f = base_files[i];
if (f->number == number) {
if (f->fd.GetNumber() == number) {
found = true;
break;
}
@ -1466,7 +1451,7 @@ class VersionSet::Builder {
for (FileSet::const_iterator added_iter = added->begin();
added_iter != added->end(); ++added_iter) {
FileMetaData* f = *added_iter;
if (f->number == number) {
if (f->fd.GetNumber() == number) {
found = true;
break;
}
@ -1479,7 +1464,7 @@ class VersionSet::Builder {
for (FileSet::const_iterator added_iter = added->begin();
added_iter != added->end(); ++added_iter) {
FileMetaData* f = *added_iter;
if (f->number == number) {
if (f->fd.GetNumber() == number) {
found = true;
break;
}
@ -1521,10 +1506,10 @@ class VersionSet::Builder {
// same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction.
f->allowed_seeks = (f->file_size / 16384);
f->allowed_seeks = (f->fd.GetFileSize() / 16384);
if (f->allowed_seeks < 100) f->allowed_seeks = 100;
levels_[level].deleted_files.erase(f->number);
levels_[level].deleted_files.erase(f->fd.GetNumber());
levels_[level].added_files->insert(f);
}
}
@ -1573,11 +1558,10 @@ class VersionSet::Builder {
bool table_io;
cfd_->table_cache()->FindTable(
base_->vset_->storage_options_, cfd_->internal_comparator(),
file_meta->number, file_meta->file_size,
&file_meta->table_reader_handle, &table_io, false);
file_meta->fd, &file_meta->table_reader_handle, &table_io, false);
if (file_meta->table_reader_handle != nullptr) {
// Load table_reader
file_meta->table_reader =
file_meta->fd.table_reader =
cfd_->table_cache()->GetTableReaderFromHandle(
file_meta->table_reader_handle);
}
@ -1586,7 +1570,7 @@ class VersionSet::Builder {
}
void MaybeAddFile(Version* v, int level, FileMetaData* f) {
if (levels_[level].deleted_files.count(f->number) > 0) {
if (levels_[level].deleted_files.count(f->fd.GetNumber()) > 0) {
// File is deleted: do nothing
} else {
auto* files = &v->files_[level];
@ -2592,12 +2576,8 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
for (int level = 0; level < cfd->NumberLevels(); level++) {
for (const auto& f : cfd->current()->files_[level]) {
edit.AddFile(level,
f->number,
f->file_size,
f->smallest,
f->largest,
f->smallest_seqno,
edit.AddFile(level, f->fd.GetNumber(), f->fd.GetFileSize(),
f->smallest, f->largest, f->smallest_seqno,
f->largest_seqno);
}
}
@ -2653,7 +2633,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
if (v->cfd_->internal_comparator().Compare(files[i]->largest, ikey) <=
0) {
// Entire file is before "ikey", so just add the file size
result += files[i]->file_size;
result += files[i]->fd.GetFileSize();
} else if (v->cfd_->internal_comparator().Compare(files[i]->smallest,
ikey) > 0) {
// Entire file is after "ikey", so ignore
@ -2669,7 +2649,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
TableReader* table_reader_ptr;
Iterator* iter = v->cfd_->table_cache()->NewIterator(
ReadOptions(), storage_options_, v->cfd_->internal_comparator(),
*(files[i]), &table_reader_ptr);
files[i]->fd, &table_reader_ptr);
if (table_reader_ptr != nullptr) {
result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode());
}
@ -2702,7 +2682,7 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_list) {
v = v->next_) {
for (int level = 0; level < v->NumberLevels(); level++) {
for (const auto& f : v->files_[level]) {
live_list->push_back(f->number);
live_list->push_back(f->fd.GetNumber());
}
}
}
@ -2728,7 +2708,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
for (const auto& file : *c->inputs(which)) {
list[num++] = cfd->table_cache()->NewIterator(
read_options, storage_options_compactions_,
cfd->internal_comparator(), *file, nullptr,
cfd->internal_comparator(), file->fd, nullptr,
true /* for compaction */);
}
} else {
@ -2763,13 +2743,13 @@ bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) {
// verify files in level
int level = c->level();
for (int i = 0; i < c->num_input_files(0); i++) {
uint64_t number = c->input(0,i)->number;
uint64_t number = c->input(0, i)->fd.GetNumber();
// look for this file in the current version
bool found = false;
for (unsigned int j = 0; j < version->files_[level].size(); j++) {
FileMetaData* f = version->files_[level][j];
if (f->number == number) {
if (f->fd.GetNumber() == number) {
found = true;
break;
}
@ -2781,13 +2761,13 @@ bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) {
// verify level+1 files
level++;
for (int i = 0; i < c->num_input_files(1); i++) {
uint64_t number = c->input(1,i)->number;
uint64_t number = c->input(1, i)->fd.GetNumber();
// look for this file in the current version
bool found = false;
for (unsigned int j = 0; j < version->files_[level].size(); j++) {
FileMetaData* f = version->files_[level][j];
if (f->number == number) {
if (f->fd.GetNumber() == number) {
found = true;
break;
}
@ -2807,7 +2787,7 @@ Status VersionSet::GetMetadataForFile(uint64_t number, int* filelevel,
Version* version = cfd_iter->current();
for (int level = 0; level < version->NumberLevels(); level++) {
for (const auto& file : version->files_[level]) {
if (file->number == number) {
if (file->fd.GetNumber() == number) {
*meta = file;
*filelevel = level;
*cfd = cfd_iter;
@ -2825,9 +2805,9 @@ void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
for (const auto& file : cfd->current()->files_[level]) {
LiveFileMetaData filemetadata;
filemetadata.column_family_name = cfd->GetName();
filemetadata.name = TableFileName("", file->number);
filemetadata.name = TableFileName("", file->fd.GetNumber());
filemetadata.level = level;
filemetadata.size = file->file_size;
filemetadata.size = file->fd.GetFileSize();
filemetadata.smallestkey = file->smallest.user_key().ToString();
filemetadata.largestkey = file->largest.user_key().ToString();
filemetadata.smallest_seqno = file->smallest_seqno;

@ -31,7 +31,7 @@ class FindFileTest {
SequenceNumber smallest_seq = 100,
SequenceNumber largest_seq = 100) {
FileMetaData* f = new FileMetaData;
f->number = files_.size() + 1;
f->fd = FileDescriptor(files_.size() + 1, 0);
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
f->largest = InternalKey(largest, largest_seq, kTypeValue);
files_.push_back(f);

Loading…
Cancel
Save