Refactor: group metadata needed to open an SST file to a separate copyable struct

Summary:
We added multiple fields to FileMetaData recently and are planning to add more.
This refactoring separate the minimum information for accessing the file. This object is copyable (FileMetaData is not copyable since the ref counter). I hope this refactoring can enable further improvements:

(1) use it to design a more efficient data structure to speed up read queries.
(2) in the future, when we add information of storage level, we can easily do the encoding, instead of enlarge this structure, which might expand memory work set for file meta data.

The definition is same as current EncodedFileMetaData used in two level iterator, so now the logic in two level iterator is easier to understand.

Test Plan: make all check

Reviewers: haobo, igor, ljin

Reviewed By: ljin

Subscribers: leveldb, dhruba, yhchiang

Differential Revision: https://reviews.facebook.net/D18933
main
sdong 10 years ago
parent 4d913cfbc3
commit cadc1adffa
  1. 12
      db/builder.cc
  2. 2
      db/builder.h
  3. 14
      db/compaction.cc
  4. 88
      db/compaction_picker.cc
  5. 92
      db/db_impl.cc
  6. 4
      db/forward_iterator.cc
  7. 34
      db/repair.cc
  8. 39
      db/table_cache.cc
  9. 12
      db/table_cache.h
  10. 33
      db/version_edit.cc
  11. 35
      db/version_edit.h
  12. 120
      db/version_set.cc
  13. 2
      db/version_set_test.cc

@ -42,7 +42,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
const SequenceNumber earliest_seqno_in_memtable, const SequenceNumber earliest_seqno_in_memtable,
const CompressionType compression) { const CompressionType compression) {
Status s; Status s;
meta->file_size = 0; meta->fd.file_size = 0;
meta->smallest_seqno = meta->largest_seqno = 0; meta->smallest_seqno = meta->largest_seqno = 0;
iter->SeekToFirst(); iter->SeekToFirst();
@ -54,7 +54,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
purge = false; purge = false;
} }
std::string fname = TableFileName(dbname, meta->number); std::string fname = TableFileName(dbname, meta->fd.GetNumber());
if (iter->Valid()) { if (iter->Valid()) {
unique_ptr<WritableFile> file; unique_ptr<WritableFile> file;
s = env->NewWritableFile(fname, &file, soptions); s = env->NewWritableFile(fname, &file, soptions);
@ -177,8 +177,8 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
if (s.ok()) { if (s.ok()) {
s = builder->Finish(); s = builder->Finish();
if (s.ok()) { if (s.ok()) {
meta->file_size = builder->FileSize(); meta->fd.file_size = builder->FileSize();
assert(meta->file_size > 0); assert(meta->fd.GetFileSize() > 0);
} }
} else { } else {
builder->Abandon(); builder->Abandon();
@ -202,7 +202,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
if (s.ok()) { if (s.ok()) {
// Verify that the table is usable // Verify that the table is usable
Iterator* it = table_cache->NewIterator(ReadOptions(), soptions, Iterator* it = table_cache->NewIterator(ReadOptions(), soptions,
internal_comparator, *meta); internal_comparator, meta->fd);
s = it->status(); s = it->status();
delete it; delete it;
} }
@ -213,7 +213,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
s = iter->status(); s = iter->status();
} }
if (s.ok() && meta->file_size > 0) { if (s.ok() && meta->fd.GetFileSize() > 0) {
// Keep it // Keep it
} else { } else {
env->DeleteFile(fname); env->DeleteFile(fname);

@ -29,7 +29,7 @@ extern TableBuilder* NewTableBuilder(
WritableFile* file, CompressionType compression_type); WritableFile* file, CompressionType compression_type);
// Build a Table file from the contents of *iter. The generated file // Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of // will be named according to number specified in meta. On success, the rest of
// *meta will be filled with metadata about the generated table. // *meta will be filled with metadata about the generated table.
// If no data is present in *iter, meta->file_size will be set to // If no data is present in *iter, meta->file_size will be set to
// zero, and no Table file will be produced. // zero, and no Table file will be produced.

@ -21,7 +21,7 @@ namespace rocksdb {
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) { static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0; uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) { for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size; sum += files[i]->fd.GetFileSize();
} }
return sum; return sum;
} }
@ -90,7 +90,7 @@ bool Compaction::IsDeletionCompaction() const { return deletion_compaction_; }
void Compaction::AddInputDeletions(VersionEdit* edit) { void Compaction::AddInputDeletions(VersionEdit* edit) {
for (int which = 0; which < 2; which++) { for (int which = 0; which < 2; which++) {
for (size_t i = 0; i < inputs_[which].size(); i++) { for (size_t i = 0; i < inputs_[which].size(); i++) {
edit->DeleteFile(level_ + which, inputs_[which][i]->number); edit->DeleteFile(level_ + which, inputs_[which][i]->fd.GetNumber());
} }
} }
} }
@ -127,7 +127,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) {
icmp->Compare(internal_key, icmp->Compare(internal_key,
grandparents_[grandparent_index_]->largest.Encode()) > 0) { grandparents_[grandparent_index_]->largest.Encode()) > 0) {
if (seen_key_) { if (seen_key_) {
overlapped_bytes_ += grandparents_[grandparent_index_]->file_size; overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
} }
assert(grandparent_index_ + 1 >= grandparents_.size() || assert(grandparent_index_ + 1 >= grandparents_.size() ||
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(), icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
@ -212,9 +212,9 @@ int InputSummary(const std::vector<FileMetaData*>& files, char* output,
int sz = len - write; int sz = len - write;
int ret; int ret;
char sztxt[16]; char sztxt[16];
AppendHumanBytes(files.at(i)->file_size, sztxt, 16); AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ", files.at(i)->number, ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
sztxt); files.at(i)->fd.GetNumber(), sztxt);
if (ret < 0 || ret >= sz) break; if (ret < 0 || ret >= sz) break;
write += ret; write += ret;
} }
@ -258,7 +258,7 @@ uint64_t Compaction::OutputFilePreallocationSize() {
cfd_->compaction_picker()->MaxFileSizeForLevel(output_level()); cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
} else { } else {
for (const auto& f : inputs_[0]) { for (const auto& f : inputs_[0]) {
preallocation_size += f->file_size; preallocation_size += f->fd.GetFileSize();
} }
} }
// Over-estimate slightly so we don't end up just barely crossing // Over-estimate slightly so we don't end up just barely crossing

@ -22,7 +22,7 @@ namespace {
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) { uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0; uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) { for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size; sum += files[i]->fd.GetFileSize();
} }
return sum; return sum;
} }
@ -80,7 +80,7 @@ void CompactionPicker::SizeBeingCompacted(std::vector<uint64_t>& sizes) {
for (auto c : compactions_in_progress_[level]) { for (auto c : compactions_in_progress_[level]) {
assert(c->level() == level); assert(c->level() == level);
for (int i = 0; i < c->num_input_files(0); i++) { for (int i = 0; i < c->num_input_files(0); i++) {
total += c->input(0,i)->file_size; total += c->input(0, i)->fd.GetFileSize();
} }
} }
sizes[level] = total; sizes[level] = total;
@ -335,7 +335,7 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level,
MaxFileSizeForLevel(input_level) * options_->source_compaction_factor; MaxFileSizeForLevel(input_level) * options_->source_compaction_factor;
uint64_t total = 0; uint64_t total = 0;
for (size_t i = 0; i + 1 < inputs.size(); ++i) { for (size_t i = 0; i + 1 < inputs.size(); ++i) {
uint64_t s = inputs[i]->file_size; uint64_t s = inputs[i]->fd.GetFileSize();
total += s; total += s;
if (total >= limit) { if (total >= limit) {
**compaction_end = inputs[i + 1]->smallest; **compaction_end = inputs[i + 1]->smallest;
@ -508,10 +508,11 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version,
FileMetaData* f = c->input_version_->files_[level][index]; FileMetaData* f = c->input_version_->files_[level][index];
// check to verify files are arranged in descending size // check to verify files are arranged in descending size
assert((i == file_size.size() - 1) || assert(
(i >= Version::number_of_files_to_sort_ - 1) || (i == file_size.size() - 1) ||
(f->file_size >= (i >= Version::number_of_files_to_sort_ - 1) ||
c->input_version_->files_[level][file_size[i + 1]]->file_size)); (f->fd.GetFileSize() >=
c->input_version_->files_[level][file_size[i + 1]]->fd.GetFileSize()));
// do not pick a file to compact if it is being compacted // do not pick a file to compact if it is being compacted
// from n-1 level. // from n-1 level.
@ -680,19 +681,21 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
candidate_count = 1; candidate_count = 1;
break; break;
} }
LogToBuffer( LogToBuffer(log_buffer,
log_buffer, "[%s] Universal: file %lu[%d] being compacted, skipping", "[%s] Universal: file %lu[%d] being compacted, skipping",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop); version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), loop);
f = nullptr; f = nullptr;
} }
// This file is not being compacted. Consider it as the // This file is not being compacted. Consider it as the
// first candidate to be compacted. // first candidate to be compacted.
uint64_t candidate_size = f != nullptr? f->file_size : 0; uint64_t candidate_size = f != nullptr ? f->fd.GetFileSize() : 0;
if (f != nullptr) { if (f != nullptr) {
LogToBuffer( LogToBuffer(log_buffer,
log_buffer, "[%s] Universal: Possible candidate file %lu[%d].", "[%s] Universal: Possible candidate file %lu[%d].",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop); version->cfd_->GetName().c_str(),
(unsigned long)f->fd.GetNumber(), loop);
} }
// Check if the suceeding files need compaction. // Check if the suceeding files need compaction.
@ -711,13 +714,13 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
// kCompactionStopStyleSimilarSize, it's simply the size of the last // kCompactionStopStyleSimilarSize, it's simply the size of the last
// picked file. // picked file.
uint64_t sz = (candidate_size * (100L + ratio)) /100; uint64_t sz = (candidate_size * (100L + ratio)) /100;
if (sz < f->file_size) { if (sz < f->fd.GetFileSize()) {
break; break;
} }
if (options_->compaction_options_universal.stop_style == kCompactionStopStyleSimilarSize) { if (options_->compaction_options_universal.stop_style == kCompactionStopStyleSimilarSize) {
// Similar-size stopping rule: also check the last picked file isn't // Similar-size stopping rule: also check the last picked file isn't
// far larger than the next candidate file. // far larger than the next candidate file.
sz = (f->file_size * (100L + ratio)) / 100; sz = (f->fd.GetFileSize() * (100L + ratio)) / 100;
if (sz < candidate_size) { if (sz < candidate_size) {
// If the small file we've encountered begins a run of similar-size // If the small file we've encountered begins a run of similar-size
// files, we'll pick them up on a future iteration of the outer // files, we'll pick them up on a future iteration of the outer
@ -725,9 +728,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
// by the last-resort read amp strategy which disregards size ratios. // by the last-resort read amp strategy which disregards size ratios.
break; break;
} }
candidate_size = f->file_size; candidate_size = f->fd.GetFileSize();
} else { // default kCompactionStopStyleTotalSize } else { // default kCompactionStopStyleTotalSize
candidate_size += f->file_size; candidate_size += f->fd.GetFileSize();
} }
candidate_count++; candidate_count++;
} }
@ -744,8 +747,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
FileMetaData* f = version->files_[level][index]; FileMetaData* f = version->files_[level][index];
LogToBuffer(log_buffer, LogToBuffer(log_buffer,
"[%s] Universal: Skipping file %lu[%d] with size %lu %d\n", "[%s] Universal: Skipping file %lu[%d] with size %lu %d\n",
version->cfd_->GetName().c_str(), (unsigned long)f->number, version->cfd_->GetName().c_str(),
i, (unsigned long)f->file_size, f->being_compacted); (unsigned long)f->fd.GetNumber(), i,
(unsigned long)f->fd.GetFileSize(), f->being_compacted);
} }
} }
} }
@ -763,7 +767,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
uint64_t older_file_size = 0; uint64_t older_file_size = 0;
for (unsigned int i = file_by_time.size() - 1; i >= first_index_after; for (unsigned int i = file_by_time.size() - 1; i >= first_index_after;
i--) { i--) {
older_file_size += version->files_[level][file_by_time[i]]->file_size; older_file_size +=
version->files_[level][file_by_time[i]]->fd.GetFileSize();
if (older_file_size * 100L >= total_size * (long) ratio_to_compress) { if (older_file_size * 100L >= total_size * (long) ratio_to_compress) {
enable_compression = false; enable_compression = false;
break; break;
@ -779,10 +784,10 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
int index = file_by_time[i]; int index = file_by_time[i];
FileMetaData* f = c->input_version_->files_[level][index]; FileMetaData* f = c->input_version_->files_[level][index];
c->inputs_[0].push_back(f); c->inputs_[0].push_back(f);
LogToBuffer(log_buffer, LogToBuffer(
"[%s] Universal: Picking file %lu[%d] with size %lu\n", log_buffer, "[%s] Universal: Picking file %lu[%d] with size %lu\n",
version->cfd_->GetName().c_str(), (unsigned long)f->number, i, version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(), i,
(unsigned long)f->file_size); (unsigned long)f->fd.GetFileSize());
} }
return c; return c;
} }
@ -818,10 +823,10 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
start_index = loop; // Consider this as the first candidate. start_index = loop; // Consider this as the first candidate.
break; break;
} }
LogToBuffer(log_buffer, LogToBuffer(
"[%s] Universal: skipping file %lu[%d] compacted %s", log_buffer, "[%s] Universal: skipping file %lu[%d] compacted %s",
version->cfd_->GetName().c_str(), (unsigned long)f->number, version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(),
loop, " cannot be a candidate to reduce size amp.\n"); loop, " cannot be a candidate to reduce size amp.\n");
f = nullptr; f = nullptr;
} }
if (f == nullptr) { if (f == nullptr) {
@ -829,8 +834,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
} }
LogToBuffer(log_buffer, "[%s] Universal: First candidate file %lu[%d] %s", LogToBuffer(log_buffer, "[%s] Universal: First candidate file %lu[%d] %s",
version->cfd_->GetName().c_str(), (unsigned long)f->number, version->cfd_->GetName().c_str(),
start_index, " to reduce size amp.\n"); (unsigned long)f->fd.GetNumber(), start_index,
" to reduce size amp.\n");
// keep adding up all the remaining files // keep adding up all the remaining files
for (unsigned int loop = start_index; loop < file_by_time.size() - 1; for (unsigned int loop = start_index; loop < file_by_time.size() - 1;
@ -840,11 +846,12 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
if (f->being_compacted) { if (f->being_compacted) {
LogToBuffer( LogToBuffer(
log_buffer, "[%s] Universal: Possible candidate file %lu[%d] %s.", log_buffer, "[%s] Universal: Possible candidate file %lu[%d] %s.",
version->cfd_->GetName().c_str(), (unsigned long)f->number, loop, version->cfd_->GetName().c_str(), (unsigned long)f->fd.GetNumber(),
loop,
" is already being compacted. No size amp reduction possible.\n"); " is already being compacted. No size amp reduction possible.\n");
return nullptr; return nullptr;
} }
candidate_size += f->file_size; candidate_size += f->fd.GetFileSize();
candidate_count++; candidate_count++;
} }
if (candidate_count == 0) { if (candidate_count == 0) {
@ -853,7 +860,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
// size of earliest file // size of earliest file
int index = file_by_time[file_by_time.size() - 1]; int index = file_by_time[file_by_time.size() - 1];
uint64_t earliest_file_size = version->files_[level][index]->file_size; uint64_t earliest_file_size = version->files_[level][index]->fd.GetFileSize();
// size amplification = percentage of additional size // size amplification = percentage of additional size
if (candidate_size * 100 < ratio * earliest_file_size) { if (candidate_size * 100 < ratio * earliest_file_size) {
@ -885,8 +892,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
c->inputs_[0].push_back(f); c->inputs_[0].push_back(f);
LogToBuffer(log_buffer, LogToBuffer(log_buffer,
"[%s] Universal: size amp picking file %lu[%d] with size %lu", "[%s] Universal: size amp picking file %lu[%d] with size %lu",
version->cfd_->GetName().c_str(), (unsigned long)f->number, version->cfd_->GetName().c_str(),
index, (unsigned long)f->file_size); (unsigned long)f->fd.GetNumber(), index,
(unsigned long)f->fd.GetFileSize());
} }
return c; return c;
} }
@ -896,7 +904,7 @@ Compaction* FIFOCompactionPicker::PickCompaction(Version* version,
assert(version->NumberLevels() == 1); assert(version->NumberLevels() == 1);
uint64_t total_size = 0; uint64_t total_size = 0;
for (const auto& file : version->files_[0]) { for (const auto& file : version->files_[0]) {
total_size += file->file_size; total_size += file->fd.GetFileSize();
} }
if (total_size <= options_->compaction_options_fifo.max_table_files_size || if (total_size <= options_->compaction_options_fifo.max_table_files_size ||
@ -924,13 +932,13 @@ Compaction* FIFOCompactionPicker::PickCompaction(Version* version,
for (auto ritr = version->files_[0].rbegin(); for (auto ritr = version->files_[0].rbegin();
ritr != version->files_[0].rend(); ++ritr) { ritr != version->files_[0].rend(); ++ritr) {
auto f = *ritr; auto f = *ritr;
total_size -= f->file_size; total_size -= f->fd.GetFileSize();
c->inputs_[0].push_back(f); c->inputs_[0].push_back(f);
char tmp_fsize[16]; char tmp_fsize[16];
AppendHumanBytes(f->file_size, tmp_fsize, sizeof(tmp_fsize)); AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
LogToBuffer(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 LogToBuffer(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64
" with size %s for deletion", " with size %s for deletion",
version->cfd_->GetName().c_str(), f->number, tmp_fsize); version->cfd_->GetName().c_str(), f->fd.GetNumber(), tmp_fsize);
if (total_size <= options_->compaction_options_fifo.max_table_files_size) { if (total_size <= options_->compaction_options_fifo.max_table_files_size) {
break; break;
} }

@ -644,8 +644,7 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
const char* kDumbDbName = ""; const char* kDumbDbName = "";
for (auto file : state.sst_delete_files) { for (auto file : state.sst_delete_files) {
candidate_files.push_back( candidate_files.push_back(
TableFileName(kDumbDbName, file->number).substr(1) TableFileName(kDumbDbName, file->fd.GetNumber()).substr(1));
);
delete file; delete file;
} }
@ -1370,14 +1369,14 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
mutex_.AssertHeld(); mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros(); const uint64_t start_micros = env_->NowMicros();
FileMetaData meta; FileMetaData meta;
meta.number = versions_->NewFileNumber(); meta.fd.number = versions_->NewFileNumber();
pending_outputs_.insert(meta.number); pending_outputs_.insert(meta.fd.GetNumber());
Iterator* iter = mem->NewIterator(ReadOptions(), true); Iterator* iter = mem->NewIterator(ReadOptions(), true);
const SequenceNumber newest_snapshot = snapshots_.GetNewest(); const SequenceNumber newest_snapshot = snapshots_.GetNewest();
const SequenceNumber earliest_seqno_in_memtable = const SequenceNumber earliest_seqno_in_memtable =
mem->GetFirstSequenceNumber(); mem->GetFirstSequenceNumber();
Log(options_.info_log, "[%s] Level-0 table #%lu: started", Log(options_.info_log, "[%s] Level-0 table #%lu: started",
cfd->GetName().c_str(), (unsigned long)meta.number); cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber());
Status s; Status s;
{ {
@ -1391,27 +1390,28 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
} }
Log(options_.info_log, "[%s] Level-0 table #%lu: %lu bytes %s", Log(options_.info_log, "[%s] Level-0 table #%lu: %lu bytes %s",
cfd->GetName().c_str(), (unsigned long)meta.number, cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber(),
(unsigned long)meta.file_size, s.ToString().c_str()); (unsigned long)meta.fd.GetFileSize(), s.ToString().c_str());
delete iter; delete iter;
pending_outputs_.erase(meta.number); pending_outputs_.erase(meta.fd.GetNumber());
// Note that if file_size is zero, the file has been deleted and // Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest. // should not be added to the manifest.
int level = 0; int level = 0;
if (s.ok() && meta.file_size > 0) { if (s.ok() && meta.fd.GetFileSize() > 0) {
edit->AddFile(level, meta.number, meta.file_size, edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetFileSize(),
meta.smallest, meta.largest, meta.smallest, meta.largest, meta.smallest_seqno,
meta.smallest_seqno, meta.largest_seqno); meta.largest_seqno);
} }
InternalStats::CompactionStats stats; InternalStats::CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros; stats.micros = env_->NowMicros() - start_micros;
stats.bytes_written = meta.file_size; stats.bytes_written = meta.fd.GetFileSize();
stats.files_out_levelnp1 = 1; stats.files_out_levelnp1 = 1;
cfd->internal_stats()->AddCompactionStats(level, stats); cfd->internal_stats()->AddCompactionStats(level, stats);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size); RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s; return s;
} }
@ -1421,9 +1421,9 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
mutex_.AssertHeld(); mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros(); const uint64_t start_micros = env_->NowMicros();
FileMetaData meta; FileMetaData meta;
meta.number = versions_->NewFileNumber(); meta.fd.number = versions_->NewFileNumber();
*filenumber = meta.number; *filenumber = meta.fd.GetNumber();
pending_outputs_.insert(meta.number); pending_outputs_.insert(meta.fd.GetNumber());
const SequenceNumber newest_snapshot = snapshots_.GetNewest(); const SequenceNumber newest_snapshot = snapshots_.GetNewest();
const SequenceNumber earliest_seqno_in_memtable = const SequenceNumber earliest_seqno_in_memtable =
@ -1443,7 +1443,7 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
Iterator* iter = NewMergingIterator(&cfd->internal_comparator(), Iterator* iter = NewMergingIterator(&cfd->internal_comparator(),
&memtables[0], memtables.size()); &memtables[0], memtables.size());
Log(options_.info_log, "[%s] Level-0 flush table #%lu: started", Log(options_.info_log, "[%s] Level-0 flush table #%lu: started",
cfd->GetName().c_str(), (unsigned long)meta.number); cfd->GetName().c_str(), (unsigned long)meta.fd.GetNumber());
s = BuildTable(dbname_, env_, *cfd->options(), storage_options_, s = BuildTable(dbname_, env_, *cfd->options(), storage_options_,
cfd->table_cache(), iter, &meta, cfd->internal_comparator(), cfd->table_cache(), iter, &meta, cfd->internal_comparator(),
@ -1452,8 +1452,8 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
LogFlush(options_.info_log); LogFlush(options_.info_log);
delete iter; delete iter;
Log(options_.info_log, "[%s] Level-0 flush table #%lu: %lu bytes %s", Log(options_.info_log, "[%s] Level-0 flush table #%lu: %lu bytes %s",
cfd->GetName().c_str(), (unsigned long)meta.number, cfd->GetName().c_str(), (unsigned long)meta.fd.GetFileSize(),
(unsigned long)meta.file_size, s.ToString().c_str()); (unsigned long)meta.fd.GetFileSize(), s.ToString().c_str());
if (!options_.disableDataSync) { if (!options_.disableDataSync) {
db_directory_->Fsync(); db_directory_->Fsync();
@ -1477,7 +1477,7 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
// Note that if file_size is zero, the file has been deleted and // Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest. // should not be added to the manifest.
int level = 0; int level = 0;
if (s.ok() && meta.file_size > 0) { if (s.ok() && meta.fd.GetFileSize() > 0) {
const Slice min_user_key = meta.smallest.user_key(); const Slice min_user_key = meta.smallest.user_key();
const Slice max_user_key = meta.largest.user_key(); const Slice max_user_key = meta.largest.user_key();
// if we have more than 1 background thread, then we cannot // if we have more than 1 background thread, then we cannot
@ -1488,16 +1488,17 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
cfd->options()->compaction_style == kCompactionStyleLevel) { cfd->options()->compaction_style == kCompactionStyleLevel) {
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
} }
edit->AddFile(level, meta.number, meta.file_size, edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetFileSize(),
meta.smallest, meta.largest, meta.smallest, meta.largest, meta.smallest_seqno,
meta.smallest_seqno, meta.largest_seqno); meta.largest_seqno);
} }
InternalStats::CompactionStats stats; InternalStats::CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros; stats.micros = env_->NowMicros() - start_micros;
stats.bytes_written = meta.file_size; stats.bytes_written = meta.fd.GetFileSize();
cfd->internal_stats()->AddCompactionStats(level, stats); cfd->internal_stats()->AddCompactionStats(level, stats);
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size); RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s; return s;
} }
@ -1688,9 +1689,10 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
VersionEdit edit; VersionEdit edit;
edit.SetColumnFamily(cfd->GetID()); edit.SetColumnFamily(cfd->GetID());
for (const auto& f : cfd->current()->files_[level]) { for (const auto& f : cfd->current()->files_[level]) {
edit.DeleteFile(level, f->number); edit.DeleteFile(level, f->fd.GetNumber());
edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest, edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetFileSize(),
f->smallest_seqno, f->largest_seqno); f->smallest, f->largest, f->smallest_seqno,
f->largest_seqno);
} }
Log(options_.info_log, "[%s] Apply version edit:\n%s", Log(options_.info_log, "[%s] Apply version edit:\n%s",
cfd->GetName().c_str(), edit.DebugString().data()); cfd->GetName().c_str(), edit.DebugString().data());
@ -2172,7 +2174,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
assert(c->column_family_data()->options()->compaction_style == assert(c->column_family_data()->options()->compaction_style ==
kCompactionStyleFIFO); kCompactionStyleFIFO);
for (const auto& f : *c->inputs(0)) { for (const auto& f : *c->inputs(0)) {
c->edit()->DeleteFile(c->level(), f->number); c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
} }
status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_, status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_,
db_directory_.get()); db_directory_.get());
@ -2186,21 +2188,21 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
// Move file to next level // Move file to next level
assert(c->num_input_files(0) == 1); assert(c->num_input_files(0) == 1);
FileMetaData* f = c->input(0, 0); FileMetaData* f = c->input(0, 0);
c->edit()->DeleteFile(c->level(), f->number); c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
c->edit()->AddFile(c->level() + 1, f->number, f->file_size, c->edit()->AddFile(c->level() + 1, f->fd.GetNumber(), f->fd.GetFileSize(),
f->smallest, f->largest, f->smallest, f->largest, f->smallest_seqno,
f->smallest_seqno, f->largest_seqno); f->largest_seqno);
status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_, status = versions_->LogAndApply(c->column_family_data(), c->edit(), &mutex_,
db_directory_.get()); db_directory_.get());
InstallSuperVersion(c->column_family_data(), deletion_state); InstallSuperVersion(c->column_family_data(), deletion_state);
Version::LevelSummaryStorage tmp; Version::LevelSummaryStorage tmp;
LogToBuffer(log_buffer, "[%s] Moved #%lld to level-%d %lld bytes %s: %s\n", LogToBuffer(
c->column_family_data()->GetName().c_str(), log_buffer, "[%s] Moved #%lld to level-%d %lld bytes %s: %s\n",
static_cast<unsigned long long>(f->number), c->level() + 1, c->column_family_data()->GetName().c_str(),
static_cast<unsigned long long>(f->file_size), static_cast<unsigned long long>(f->fd.GetNumber()), c->level() + 1,
status.ToString().c_str(), static_cast<unsigned long long>(f->fd.GetFileSize()),
c->input_version()->LevelSummary(&tmp)); status.ToString().c_str(), c->input_version()->LevelSummary(&tmp));
c->ReleaseCompactionFiles(status); c->ReleaseCompactionFiles(status);
*madeProgress = true; *madeProgress = true;
} else { } else {
@ -2394,7 +2396,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
if (s.ok() && current_entries > 0) { if (s.ok() && current_entries > 0) {
// Verify that the table is usable // Verify that the table is usable
ColumnFamilyData* cfd = compact->compaction->column_family_data(); ColumnFamilyData* cfd = compact->compaction->column_family_data();
FileMetaData meta(output_number, current_bytes); FileDescriptor meta(output_number, current_bytes);
Iterator* iter = cfd->table_cache()->NewIterator( Iterator* iter = cfd->table_cache()->NewIterator(
ReadOptions(), storage_options_, cfd->internal_comparator(), meta); ReadOptions(), storage_options_, cfd->internal_comparator(), meta);
s = iter->status(); s = iter->status();
@ -3094,15 +3096,15 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
stats.files_out_levelnp1 = num_output_files; stats.files_out_levelnp1 = num_output_files;
for (int i = 0; i < compact->compaction->num_input_files(0); i++) { for (int i = 0; i < compact->compaction->num_input_files(0); i++) {
stats.bytes_readn += compact->compaction->input(0, i)->file_size; stats.bytes_readn += compact->compaction->input(0, i)->fd.GetFileSize();
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES, RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
compact->compaction->input(0, i)->file_size); compact->compaction->input(0, i)->fd.GetFileSize());
} }
for (int i = 0; i < compact->compaction->num_input_files(1); i++) { for (int i = 0; i < compact->compaction->num_input_files(1); i++) {
stats.bytes_readnp1 += compact->compaction->input(1, i)->file_size; stats.bytes_readnp1 += compact->compaction->input(1, i)->fd.GetFileSize();
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES, RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
compact->compaction->input(1, i)->file_size); compact->compaction->input(1, i)->fd.GetFileSize());
} }
for (int i = 0; i < num_output_files; i++) { for (int i = 0; i < num_output_files; i++) {

@ -39,7 +39,7 @@ class LevelIterator : public Iterator {
file_index_ = file_index; file_index_ = file_index;
file_iter_.reset(cfd_->table_cache()->NewIterator( file_iter_.reset(cfd_->table_cache()->NewIterator(
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(), read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
*(files_[file_index_]), nullptr /* table_reader_ptr */, false)); files_[file_index_]->fd, nullptr /* table_reader_ptr */, false));
} }
valid_ = false; valid_ = false;
} }
@ -293,7 +293,7 @@ void ForwardIterator::RebuildIterators() {
l0_iters_.reserve(l0_files.size()); l0_iters_.reserve(l0_files.size());
for (const auto* l0 : l0_files) { for (const auto* l0 : l0_files) {
l0_iters_.push_back(cfd_->table_cache()->NewIterator( l0_iters_.push_back(cfd_->table_cache()->NewIterator(
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0)); read_options_, *cfd_->soptions(), cfd_->internal_comparator(), l0->fd));
} }
level_iters_.reserve(sv_->current->NumberLevels() - 1); level_iters_.reserve(sv_->current->NumberLevels() - 1);
for (int32_t level = 1; level < sv_->current->NumberLevels(); ++level) { for (int32_t level = 1; level < sv_->current->NumberLevels(); ++level) {

@ -84,7 +84,7 @@ class Repairer {
if (status.ok()) { if (status.ok()) {
unsigned long long bytes = 0; unsigned long long bytes = 0;
for (size_t i = 0; i < tables_.size(); i++) { for (size_t i = 0; i < tables_.size(); i++) {
bytes += tables_[i].meta.file_size; bytes += tables_[i].meta.fd.GetFileSize();
} }
Log(options_.info_log, Log(options_.info_log,
"**** Repaired rocksdb %s; " "**** Repaired rocksdb %s; "
@ -230,7 +230,7 @@ class Repairer {
// Do not record a version edit for this conversion to a Table // Do not record a version edit for this conversion to a Table
// since ExtractMetaData() will also generate edits. // since ExtractMetaData() will also generate edits.
FileMetaData meta; FileMetaData meta;
meta.number = next_file_number_++; meta.fd.number = next_file_number_++;
ReadOptions ro; ReadOptions ro;
Iterator* iter = mem->NewIterator(ro, true /* enforce_total_order */); Iterator* iter = mem->NewIterator(ro, true /* enforce_total_order */);
status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_, status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_,
@ -240,22 +240,20 @@ class Repairer {
delete cf_mems_default; delete cf_mems_default;
mem = nullptr; mem = nullptr;
if (status.ok()) { if (status.ok()) {
if (meta.file_size > 0) { if (meta.fd.GetFileSize() > 0) {
table_numbers_.push_back(meta.number); table_numbers_.push_back(meta.fd.GetNumber());
} }
} }
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s", Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
(unsigned long long) log, (unsigned long long)log, counter,
counter, (unsigned long long)meta.fd.GetNumber(), status.ToString().c_str());
(unsigned long long) meta.number,
status.ToString().c_str());
return status; return status;
} }
void ExtractMetaData() { void ExtractMetaData() {
for (size_t i = 0; i < table_numbers_.size(); i++) { for (size_t i = 0; i < table_numbers_.size(); i++) {
TableInfo t; TableInfo t;
t.meta.number = table_numbers_[i]; t.meta.fd.number = table_numbers_[i];
Status status = ScanTable(&t); Status status = ScanTable(&t);
if (!status.ok()) { if (!status.ok()) {
std::string fname = TableFileName(dbname_, table_numbers_[i]); std::string fname = TableFileName(dbname_, table_numbers_[i]);
@ -270,13 +268,12 @@ class Repairer {
} }
Status ScanTable(TableInfo* t) { Status ScanTable(TableInfo* t) {
std::string fname = TableFileName(dbname_, t->meta.number); std::string fname = TableFileName(dbname_, t->meta.fd.GetNumber());
int counter = 0; int counter = 0;
Status status = env_->GetFileSize(fname, &t->meta.file_size); Status status = env_->GetFileSize(fname, &t->meta.fd.file_size);
if (status.ok()) { if (status.ok()) {
FileMetaData dummy_meta(t->meta.number, t->meta.file_size);
Iterator* iter = table_cache_->NewIterator( Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, icmp_, dummy_meta); ReadOptions(), storage_options_, icmp_, t->meta.fd);
bool empty = true; bool empty = true;
ParsedInternalKey parsed; ParsedInternalKey parsed;
t->min_sequence = 0; t->min_sequence = 0;
@ -285,7 +282,7 @@ class Repairer {
Slice key = iter->key(); Slice key = iter->key();
if (!ParseInternalKey(key, &parsed)) { if (!ParseInternalKey(key, &parsed)) {
Log(options_.info_log, "Table #%llu: unparsable key %s", Log(options_.info_log, "Table #%llu: unparsable key %s",
(unsigned long long) t->meta.number, (unsigned long long)t->meta.fd.GetNumber(),
EscapeString(key).c_str()); EscapeString(key).c_str());
continue; continue;
} }
@ -309,8 +306,7 @@ class Repairer {
delete iter; delete iter;
} }
Log(options_.info_log, "Table #%llu: %d entries %s", Log(options_.info_log, "Table #%llu: %d entries %s",
(unsigned long long) t->meta.number, (unsigned long long)t->meta.fd.GetNumber(), counter,
counter,
status.ToString().c_str()); status.ToString().c_str());
return status; return status;
} }
@ -339,9 +335,9 @@ class Repairer {
for (size_t i = 0; i < tables_.size(); i++) { for (size_t i = 0; i < tables_.size(); i++) {
// TODO(opt): separate out into multiple levels // TODO(opt): separate out into multiple levels
const TableInfo& t = tables_[i]; const TableInfo& t = tables_[i];
edit_->AddFile(0, t.meta.number, t.meta.file_size, edit_->AddFile(0, t.meta.fd.GetNumber(), t.meta.fd.GetFileSize(),
t.meta.smallest, t.meta.largest, t.meta.smallest, t.meta.largest, t.min_sequence,
t.min_sequence, t.max_sequence); t.max_sequence);
} }
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());

@ -31,7 +31,7 @@ static void UnrefEntry(void* arg1, void* arg2) {
cache->Release(h); cache->Release(h);
} }
static Slice GetSliceForFileNumber(uint64_t* file_number) { static Slice GetSliceForFileNumber(const uint64_t* file_number) {
return Slice(reinterpret_cast<const char*>(file_number), return Slice(reinterpret_cast<const char*>(file_number),
sizeof(*file_number)); sizeof(*file_number));
} }
@ -57,11 +57,10 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
Status TableCache::FindTable(const EnvOptions& toptions, Status TableCache::FindTable(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
uint64_t file_number, uint64_t file_size, const FileDescriptor& fd, Cache::Handle** handle,
Cache::Handle** handle, bool* table_io, bool* table_io, const bool no_io) {
const bool no_io) {
Status s; Status s;
Slice key = GetSliceForFileNumber(&file_number); Slice key = GetSliceForFileNumber(&fd.number);
*handle = cache_->Lookup(key); *handle = cache_->Lookup(key);
if (*handle == nullptr) { if (*handle == nullptr) {
if (no_io) { // Dont do IO and return a not-found status if (no_io) { // Dont do IO and return a not-found status
@ -70,7 +69,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
if (table_io != nullptr) { if (table_io != nullptr) {
*table_io = true; // we had to do IO from storage *table_io = true; // we had to do IO from storage
} }
std::string fname = TableFileName(dbname_, file_number); std::string fname = TableFileName(dbname_, fd.GetNumber());
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFile> file;
unique_ptr<TableReader> table_reader; unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions); s = env_->NewRandomAccessFile(fname, &file, toptions);
@ -81,8 +80,8 @@ Status TableCache::FindTable(const EnvOptions& toptions,
} }
StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS); StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS);
s = options_->table_factory->NewTableReader( s = options_->table_factory->NewTableReader(
*options_, toptions, internal_comparator, std::move(file), file_size, *options_, toptions, internal_comparator, std::move(file),
&table_reader); fd.GetFileSize(), &table_reader);
} }
if (!s.ok()) { if (!s.ok()) {
@ -101,18 +100,18 @@ Status TableCache::FindTable(const EnvOptions& toptions,
Iterator* TableCache::NewIterator(const ReadOptions& options, Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions, const EnvOptions& toptions,
const InternalKeyComparator& icomparator, const InternalKeyComparator& icomparator,
const FileMetaData& file_meta, const FileDescriptor& fd,
TableReader** table_reader_ptr, TableReader** table_reader_ptr,
bool for_compaction, Arena* arena) { bool for_compaction, Arena* arena) {
if (table_reader_ptr != nullptr) { if (table_reader_ptr != nullptr) {
*table_reader_ptr = nullptr; *table_reader_ptr = nullptr;
} }
TableReader* table_reader = file_meta.table_reader; TableReader* table_reader = fd.table_reader;
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
Status s; Status s;
if (table_reader == nullptr) { if (table_reader == nullptr) {
s = FindTable(toptions, icomparator, file_meta.number, file_meta.file_size, s = FindTable(toptions, icomparator, fd, &handle, nullptr,
&handle, nullptr, options.read_tier == kBlockCacheTier); options.read_tier == kBlockCacheTier);
if (!s.ok()) { if (!s.ok()) {
return NewErrorIterator(s, arena); return NewErrorIterator(s, arena);
} }
@ -136,16 +135,15 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Status TableCache::Get(const ReadOptions& options, Status TableCache::Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const Slice& k, void* arg, const FileDescriptor& fd, const Slice& k, void* arg,
bool (*saver)(void*, const ParsedInternalKey&, bool (*saver)(void*, const ParsedInternalKey&,
const Slice&, bool), const Slice&, bool),
bool* table_io, void (*mark_key_may_exist)(void*)) { bool* table_io, void (*mark_key_may_exist)(void*)) {
TableReader* t = file_meta.table_reader; TableReader* t = fd.table_reader;
Status s; Status s;
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
if (!t) { if (!t) {
s = FindTable(storage_options_, internal_comparator, file_meta.number, s = FindTable(storage_options_, internal_comparator, fd, &handle, table_io,
file_meta.file_size, &handle, table_io,
options.read_tier == kBlockCacheTier); options.read_tier == kBlockCacheTier);
if (s.ok()) { if (s.ok()) {
t = GetTableReaderFromHandle(handle); t = GetTableReaderFromHandle(handle);
@ -165,11 +163,10 @@ Status TableCache::Get(const ReadOptions& options,
} }
Status TableCache::GetTableProperties( Status TableCache::GetTableProperties(
const EnvOptions& toptions, const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
const FileMetaData& file_meta,
std::shared_ptr<const TableProperties>* properties, bool no_io) { std::shared_ptr<const TableProperties>* properties, bool no_io) {
Status s; Status s;
auto table_reader = file_meta.table_reader; auto table_reader = fd.table_reader;
// table already been pre-loaded? // table already been pre-loaded?
if (table_reader) { if (table_reader) {
*properties = table_reader->GetTableProperties(); *properties = table_reader->GetTableProperties();
@ -179,8 +176,8 @@ Status TableCache::GetTableProperties(
bool table_io; bool table_io;
Cache::Handle* table_handle = nullptr; Cache::Handle* table_handle = nullptr;
s = FindTable(toptions, internal_comparator, file_meta.number, s = FindTable(toptions, internal_comparator, fd, &table_handle, &table_io,
file_meta.file_size, &table_handle, &table_io, no_io); no_io);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

@ -24,10 +24,8 @@ namespace rocksdb {
class Env; class Env;
class Arena; class Arena;
struct FileMetaData; struct FileDescriptor;
// TODO(sdong): try to come up with a better API to pass the file information
// other than simply passing FileMetaData.
class TableCache { class TableCache {
public: public:
TableCache(const std::string& dbname, const Options* options, TableCache(const std::string& dbname, const Options* options,
@ -43,7 +41,7 @@ class TableCache {
// returned iterator is live. // returned iterator is live.
Iterator* NewIterator(const ReadOptions& options, const EnvOptions& toptions, Iterator* NewIterator(const ReadOptions& options, const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const FileDescriptor& file_fd,
TableReader** table_reader_ptr = nullptr, TableReader** table_reader_ptr = nullptr,
bool for_compaction = false, Arena* arena = nullptr); bool for_compaction = false, Arena* arena = nullptr);
@ -52,7 +50,7 @@ class TableCache {
// it returns false. // it returns false.
Status Get(const ReadOptions& options, Status Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const Slice& k, void* arg, const FileDescriptor& file_fd, const Slice& k, void* arg,
bool (*handle_result)(void*, const ParsedInternalKey&, bool (*handle_result)(void*, const ParsedInternalKey&,
const Slice&, bool), const Slice&, bool),
bool* table_io, void (*mark_key_may_exist)(void*) = nullptr); bool* table_io, void (*mark_key_may_exist)(void*) = nullptr);
@ -63,7 +61,7 @@ class TableCache {
// Find table reader // Find table reader
Status FindTable(const EnvOptions& toptions, Status FindTable(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
uint64_t file_number, uint64_t file_size, Cache::Handle**, const FileDescriptor& file_fd, Cache::Handle**,
bool* table_io = nullptr, const bool no_io = false); bool* table_io = nullptr, const bool no_io = false);
// Get TableReader from a cache handle. // Get TableReader from a cache handle.
@ -77,7 +75,7 @@ class TableCache {
// we set `no_io` to be true. // we set `no_io` to be true.
Status GetTableProperties(const EnvOptions& toptions, Status GetTableProperties(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const FileDescriptor& file_meta,
std::shared_ptr<const TableProperties>* properties, std::shared_ptr<const TableProperties>* properties,
bool no_io = false); bool no_io = false);

@ -95,8 +95,8 @@ void VersionEdit::EncodeTo(std::string* dst) const {
const FileMetaData& f = new_files_[i].second; const FileMetaData& f = new_files_[i].second;
PutVarint32(dst, kNewFile2); PutVarint32(dst, kNewFile2);
PutVarint32(dst, new_files_[i].first); // level PutVarint32(dst, new_files_[i].first); // level
PutVarint64(dst, f.number); PutVarint64(dst, f.fd.GetNumber());
PutVarint64(dst, f.file_size); PutVarint64(dst, f.fd.GetFileSize());
PutLengthPrefixedSlice(dst, f.smallest.Encode()); PutLengthPrefixedSlice(dst, f.smallest.Encode());
PutLengthPrefixedSlice(dst, f.largest.Encode()); PutLengthPrefixedSlice(dst, f.largest.Encode());
PutVarint64(dst, f.smallest_seqno); PutVarint64(dst, f.smallest_seqno);
@ -230,12 +230,14 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
} }
break; break;
case kNewFile: case kNewFile: {
if (GetLevel(&input, &level, &msg) && uint64_t number;
GetVarint64(&input, &f.number) && uint64_t file_size;
GetVarint64(&input, &f.file_size) && if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
GetVarint64(&input, &file_size) &&
GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest)) { GetInternalKey(&input, &f.largest)) {
f.fd = FileDescriptor(number, file_size);
new_files_.push_back(std::make_pair(level, f)); new_files_.push_back(std::make_pair(level, f));
} else { } else {
if (!msg) { if (!msg) {
@ -243,15 +245,17 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
} }
} }
break; break;
}
case kNewFile2: case kNewFile2: {
if (GetLevel(&input, &level, &msg) && uint64_t number;
GetVarint64(&input, &f.number) && uint64_t file_size;
GetVarint64(&input, &f.file_size) && if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) &&
GetVarint64(&input, &file_size) &&
GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest) && GetInternalKey(&input, &f.largest) &&
GetVarint64(&input, &f.smallest_seqno) && GetVarint64(&input, &f.smallest_seqno) &&
GetVarint64(&input, &f.largest_seqno) ) { GetVarint64(&input, &f.largest_seqno)) {
f.fd = FileDescriptor(number, file_size);
new_files_.push_back(std::make_pair(level, f)); new_files_.push_back(std::make_pair(level, f));
} else { } else {
if (!msg) { if (!msg) {
@ -259,6 +263,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
} }
} }
break; break;
}
case kColumnFamily: case kColumnFamily:
if (!GetVarint32(&input, &column_family_)) { if (!GetVarint32(&input, &column_family_)) {
@ -336,9 +341,9 @@ std::string VersionEdit::DebugString(bool hex_key) const {
r.append("\n AddFile: "); r.append("\n AddFile: ");
AppendNumberTo(&r, new_files_[i].first); AppendNumberTo(&r, new_files_[i].first);
r.append(" "); r.append(" ");
AppendNumberTo(&r, f.number); AppendNumberTo(&r, f.fd.GetNumber());
r.append(" "); r.append(" ");
AppendNumberTo(&r, f.file_size); AppendNumberTo(&r, f.fd.GetFileSize());
r.append(" "); r.append(" ");
r.append(f.smallest.DebugString(hex_key)); r.append(f.smallest.DebugString(hex_key));
r.append(" .. "); r.append(" .. ");

@ -19,11 +19,28 @@ namespace rocksdb {
class VersionSet; class VersionSet;
// A copyable structure contains information needed to read data from an SST
// file. It can contains a pointer to a table reader opened for the file, or
// file number and size, which can be used to create a new table reader for it.
// The behavior is undefined when a copied of the structure is used when the
// file is not in any live version any more.
struct FileDescriptor {
uint64_t number;
uint64_t file_size; // File size in bytes
// Table reader in table_reader_handle
TableReader* table_reader;
FileDescriptor(uint64_t number, uint64_t file_size)
: number(number), file_size(file_size), table_reader(nullptr) {}
uint64_t GetNumber() const { return number; }
uint64_t GetFileSize() const { return file_size; }
};
struct FileMetaData { struct FileMetaData {
int refs; int refs;
FileDescriptor fd;
int allowed_seeks; // Seeks allowed until compaction int allowed_seeks; // Seeks allowed until compaction
uint64_t number;
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table InternalKey largest; // Largest internal key served by table
bool being_compacted; // Is this file undergoing compaction? bool being_compacted; // Is this file undergoing compaction?
@ -32,18 +49,13 @@ struct FileMetaData {
// Needs to be disposed when refs becomes 0. // Needs to be disposed when refs becomes 0.
Cache::Handle* table_reader_handle; Cache::Handle* table_reader_handle;
// Table reader in table_reader_handle
TableReader* table_reader;
FileMetaData(uint64_t number, uint64_t file_size) FileMetaData()
: refs(0), : refs(0),
fd(0, 0),
allowed_seeks(1 << 30), allowed_seeks(1 << 30),
number(number),
file_size(file_size),
being_compacted(false), being_compacted(false),
table_reader_handle(nullptr), table_reader_handle(nullptr) {}
table_reader(nullptr) {}
FileMetaData() : FileMetaData(0, 0) {}
}; };
class VersionEdit { class VersionEdit {
@ -89,8 +101,7 @@ class VersionEdit {
const SequenceNumber& largest_seqno) { const SequenceNumber& largest_seqno) {
assert(smallest_seqno <= largest_seqno); assert(smallest_seqno <= largest_seqno);
FileMetaData f; FileMetaData f;
f.number = file; f.fd = FileDescriptor(file, file_size);
f.file_size = file_size;
f.smallest = smallest; f.smallest = smallest;
f.largest = largest; f.largest = largest;
f.smallest_seqno = smallest_seqno; f.smallest_seqno = smallest_seqno;

@ -42,7 +42,7 @@ namespace rocksdb {
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) { static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0; uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) { for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->file_size; sum += files[i]->fd.GetFileSize();
} }
return sum; return sum;
} }
@ -150,18 +150,6 @@ bool SomeFileOverlapsRange(
return !BeforeFile(ucmp, largest_user_key, files[index]); return !BeforeFile(ucmp, largest_user_key, files[index]);
} }
namespace {
// Used for LevelFileNumIterator to pass "block handle" value,
// which actually means file information in this iterator.
// It contains subset of fields of FileMetaData, that is sufficient
// for table cache to use.
struct EncodedFileMetaData {
uint64_t number; // file number
uint64_t file_size; // file size
TableReader* table_reader; // cached table reader
};
} // namespace
// An internal iterator. For a given version/level pair, yields // An internal iterator. For a given version/level pair, yields
// information about the files in the level. For a given entry, key() // information about the files in the level. For a given entry, key()
// is the largest key that occurs in the file, and value() is an // is the largest key that occurs in the file, and value() is an
@ -173,7 +161,8 @@ class Version::LevelFileNumIterator : public Iterator {
const std::vector<FileMetaData*>* flist) const std::vector<FileMetaData*>* flist)
: icmp_(icmp), : icmp_(icmp),
flist_(flist), flist_(flist),
index_(flist->size()) { // Marks as invalid index_(flist->size()),
current_value_(0, 0) { // Marks as invalid
} }
virtual bool Valid() const { virtual bool Valid() const {
return index_ < flist_->size(); return index_ < flist_->size();
@ -204,18 +193,16 @@ class Version::LevelFileNumIterator : public Iterator {
Slice value() const { Slice value() const {
assert(Valid()); assert(Valid());
auto* file_meta = (*flist_)[index_]; auto* file_meta = (*flist_)[index_];
current_value_.number = file_meta->number; current_value_ = file_meta->fd;
current_value_.file_size = file_meta->file_size;
current_value_.table_reader = file_meta->table_reader;
return Slice(reinterpret_cast<const char*>(&current_value_), return Slice(reinterpret_cast<const char*>(&current_value_),
sizeof(EncodedFileMetaData)); sizeof(FileDescriptor));
} }
virtual Status status() const { return Status::OK(); } virtual Status status() const { return Status::OK(); }
private: private:
const InternalKeyComparator icmp_; const InternalKeyComparator icmp_;
const std::vector<FileMetaData*>* const flist_; const std::vector<FileMetaData*>* const flist_;
uint32_t index_; uint32_t index_;
mutable EncodedFileMetaData current_value_; mutable FileDescriptor current_value_;
}; };
class Version::LevelFileIteratorState : public TwoLevelIteratorState { class Version::LevelFileIteratorState : public TwoLevelIteratorState {
@ -230,17 +217,15 @@ class Version::LevelFileIteratorState : public TwoLevelIteratorState {
for_compaction_(for_compaction) {} for_compaction_(for_compaction) {}
Iterator* NewSecondaryIterator(const Slice& meta_handle) override { Iterator* NewSecondaryIterator(const Slice& meta_handle) override {
if (meta_handle.size() != sizeof(EncodedFileMetaData)) { if (meta_handle.size() != sizeof(FileDescriptor)) {
return NewErrorIterator( return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value")); Status::Corruption("FileReader invoked with unexpected value"));
} else { } else {
const EncodedFileMetaData* encoded_meta = const FileDescriptor* fd =
reinterpret_cast<const EncodedFileMetaData*>(meta_handle.data()); reinterpret_cast<const FileDescriptor*>(meta_handle.data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size); return table_cache_->NewIterator(
meta.table_reader = encoded_meta->table_reader; read_options_, env_options_, icomparator_, *fd,
return table_cache_->NewIterator(read_options_, env_options_, nullptr /* don't need reference to table*/, for_compaction_);
icomparator_, meta, nullptr /* don't need reference to table*/,
for_compaction_);
} }
} }
@ -261,12 +246,12 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
auto options = cfd_->options(); auto options = cfd_->options();
for (int level = 0; level < num_levels_; level++) { for (int level = 0; level < num_levels_; level++) {
for (const auto& file_meta : files_[level]) { for (const auto& file_meta : files_[level]) {
auto fname = TableFileName(vset_->dbname_, file_meta->number); auto fname = TableFileName(vset_->dbname_, file_meta->fd.GetNumber());
// 1. If the table is already present in table cache, load table // 1. If the table is already present in table cache, load table
// properties from there. // properties from there.
std::shared_ptr<const TableProperties> table_properties; std::shared_ptr<const TableProperties> table_properties;
Status s = table_cache->GetTableProperties( Status s = table_cache->GetTableProperties(
vset_->storage_options_, cfd_->internal_comparator(), *file_meta, vset_->storage_options_, cfd_->internal_comparator(), file_meta->fd,
&table_properties, true /* no io */); &table_properties, true /* no io */);
if (s.ok()) { if (s.ok()) {
props->insert({fname, table_properties}); props->insert({fname, table_properties});
@ -292,7 +277,7 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
// By setting the magic number to kInvalidTableMagicNumber, we can by // By setting the magic number to kInvalidTableMagicNumber, we can by
// pass the magic number check in the footer. // pass the magic number check in the footer.
s = ReadTableProperties( s = ReadTableProperties(
file.get(), file_meta->file_size, file.get(), file_meta->fd.GetFileSize(),
Footer::kInvalidTableMagicNumber /* table's magic number */, Footer::kInvalidTableMagicNumber /* table's magic number */,
vset_->env_, options->info_log.get(), &raw_table_properties); vset_->env_, options->info_log.get(), &raw_table_properties);
if (!s.ok()) { if (!s.ok()) {
@ -315,7 +300,7 @@ void Version::AddIterators(const ReadOptions& read_options,
// Merge all level zero files together since they may overlap // Merge all level zero files together since they may overlap
for (const FileMetaData* file : files_[0]) { for (const FileMetaData* file : files_[0]) {
iters->push_back(cfd_->table_cache()->NewIterator( iters->push_back(cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), *file)); read_options, soptions, cfd_->internal_comparator(), file->fd));
} }
// For levels > 0, we can use a concatenating iterator that sequentially // For levels > 0, we can use a concatenating iterator that sequentially
@ -338,7 +323,7 @@ void Version::AddIterators(const ReadOptions& read_options,
// Merge all level zero files together since they may overlap // Merge all level zero files together since they may overlap
for (const FileMetaData* file : files_[0]) { for (const FileMetaData* file : files_[0]) {
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), *file, nullptr, read_options, soptions, cfd_->internal_comparator(), file->fd, nullptr,
false, merge_iter_builder->GetArena())); false, merge_iter_builder->GetArena()));
} }
@ -461,7 +446,7 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
namespace { namespace {
bool NewestFirst(FileMetaData* a, FileMetaData* b) { bool NewestFirst(FileMetaData* a, FileMetaData* b) {
return a->number > b->number; return a->fd.GetNumber() > b->fd.GetNumber();
} }
bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) { bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
if (a->smallest_seqno != b->smallest_seqno) { if (a->smallest_seqno != b->smallest_seqno) {
@ -480,7 +465,7 @@ bool BySmallestKey(FileMetaData* a, FileMetaData* b,
return (r < 0); return (r < 0);
} }
// Break ties by file number // Break ties by file number
return (a->number < b->number); return (a->fd.GetNumber() < b->fd.GetNumber());
} }
} // anonymous namespace } // anonymous namespace
@ -571,7 +556,7 @@ void Version::Get(const ReadOptions& options,
// Prefetch table data to avoid cache miss if possible // Prefetch table data to avoid cache miss if possible
if (level == 0) { if (level == 0) {
for (int i = 0; i < num_files; ++i) { for (int i = 0; i < num_files; ++i) {
auto* r = files_[0][i]->table_reader; auto* r = files_[0][i]->fd.table_reader;
if (r) { if (r) {
r->Prepare(ikey); r->Prepare(ikey);
} }
@ -680,7 +665,7 @@ void Version::Get(const ReadOptions& options,
prev_file = f; prev_file = f;
#endif #endif
bool tableIO = false; bool tableIO = false;
*status = table_cache_->Get(options, *internal_comparator_, *f, ikey, *status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
&saver, SaveValue, &tableIO, MarkKeyMayExist); &saver, SaveValue, &tableIO, MarkKeyMayExist);
// TODO: examine the behavior for corrupted key // TODO: examine the behavior for corrupted key
if (!status->ok()) { if (!status->ok()) {
@ -793,7 +778,7 @@ void Version::ComputeCompactionScore(
uint64_t total_size = 0; uint64_t total_size = 0;
for (unsigned int i = 0; i < files_[level].size(); i++) { for (unsigned int i = 0; i < files_[level].size(); i++) {
if (!files_[level][i]->being_compacted) { if (!files_[level][i]->being_compacted) {
total_size += files_[level][i]->file_size; total_size += files_[level][i]->fd.GetFileSize();
numfiles++; numfiles++;
} }
} }
@ -850,7 +835,7 @@ namespace {
// In normal mode: descending size // In normal mode: descending size
bool CompareSizeDescending(const Version::Fsize& first, bool CompareSizeDescending(const Version::Fsize& first,
const Version::Fsize& second) { const Version::Fsize& second) {
return (first.file->file_size > second.file->file_size); return (first.file->fd.GetFileSize() > second.file->fd.GetFileSize());
} }
// A static compator used to sort files based on their seqno // A static compator used to sort files based on their seqno
// In universal style : descending seqno // In universal style : descending seqno
@ -1245,10 +1230,10 @@ const char* Version::LevelFileSummary(FileSummaryStorage* scratch,
for (const auto& f : files_[level]) { for (const auto& f : files_[level]) {
int sz = sizeof(scratch->buffer) - len; int sz = sizeof(scratch->buffer) - len;
char sztxt[16]; char sztxt[16];
AppendHumanBytes(f->file_size, sztxt, 16); AppendHumanBytes(f->fd.GetFileSize(), sztxt, 16);
int ret = snprintf(scratch->buffer + len, sz, int ret = snprintf(scratch->buffer + len, sz,
"#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->number, "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ",
f->smallest_seqno, sztxt, f->fd.GetNumber(), f->smallest_seqno, sztxt,
static_cast<int>(f->being_compacted)); static_cast<int>(f->being_compacted));
if (ret < 0 || ret >= sz) if (ret < 0 || ret >= sz)
break; break;
@ -1281,7 +1266,7 @@ void Version::AddLiveFiles(std::set<uint64_t>* live) {
for (int level = 0; level < NumberLevels(); level++) { for (int level = 0; level < NumberLevels(); level++) {
const std::vector<FileMetaData*>& files = files_[level]; const std::vector<FileMetaData*>& files = files_[level];
for (const auto& file : files) { for (const auto& file : files) {
live->insert(file->number); live->insert(file->fd.GetNumber());
} }
} }
} }
@ -1301,9 +1286,9 @@ std::string Version::DebugString(bool hex) const {
const std::vector<FileMetaData*>& files = files_[level]; const std::vector<FileMetaData*>& files = files_[level];
for (size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
r.push_back(' '); r.push_back(' ');
AppendNumberTo(&r, files[i]->number); AppendNumberTo(&r, files[i]->fd.GetNumber());
r.push_back(':'); r.push_back(':');
AppendNumberTo(&r, files[i]->file_size); AppendNumberTo(&r, files[i]->fd.GetFileSize());
r.append("["); r.append("[");
r.append(files[i]->smallest.DebugString(hex)); r.append(files[i]->smallest.DebugString(hex));
r.append(" .. "); r.append(" .. ");
@ -1452,7 +1437,7 @@ class VersionSet::Builder {
const std::vector<FileMetaData*>& base_files = base_->files_[l]; const std::vector<FileMetaData*>& base_files = base_->files_[l];
for (unsigned int i = 0; i < base_files.size(); i++) { for (unsigned int i = 0; i < base_files.size(); i++) {
FileMetaData* f = base_files[i]; FileMetaData* f = base_files[i];
if (f->number == number) { if (f->fd.GetNumber() == number) {
found = true; found = true;
break; break;
} }
@ -1466,7 +1451,7 @@ class VersionSet::Builder {
for (FileSet::const_iterator added_iter = added->begin(); for (FileSet::const_iterator added_iter = added->begin();
added_iter != added->end(); ++added_iter) { added_iter != added->end(); ++added_iter) {
FileMetaData* f = *added_iter; FileMetaData* f = *added_iter;
if (f->number == number) { if (f->fd.GetNumber() == number) {
found = true; found = true;
break; break;
} }
@ -1479,7 +1464,7 @@ class VersionSet::Builder {
for (FileSet::const_iterator added_iter = added->begin(); for (FileSet::const_iterator added_iter = added->begin();
added_iter != added->end(); ++added_iter) { added_iter != added->end(); ++added_iter) {
FileMetaData* f = *added_iter; FileMetaData* f = *added_iter;
if (f->number == number) { if (f->fd.GetNumber() == number) {
found = true; found = true;
break; break;
} }
@ -1521,10 +1506,10 @@ class VersionSet::Builder {
// same as the compaction of 40KB of data. We are a little // same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB // conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction. // of data before triggering a compaction.
f->allowed_seeks = (f->file_size / 16384); f->allowed_seeks = (f->fd.GetFileSize() / 16384);
if (f->allowed_seeks < 100) f->allowed_seeks = 100; if (f->allowed_seeks < 100) f->allowed_seeks = 100;
levels_[level].deleted_files.erase(f->number); levels_[level].deleted_files.erase(f->fd.GetNumber());
levels_[level].added_files->insert(f); levels_[level].added_files->insert(f);
} }
} }
@ -1573,11 +1558,10 @@ class VersionSet::Builder {
bool table_io; bool table_io;
cfd_->table_cache()->FindTable( cfd_->table_cache()->FindTable(
base_->vset_->storage_options_, cfd_->internal_comparator(), base_->vset_->storage_options_, cfd_->internal_comparator(),
file_meta->number, file_meta->file_size, file_meta->fd, &file_meta->table_reader_handle, &table_io, false);
&file_meta->table_reader_handle, &table_io, false);
if (file_meta->table_reader_handle != nullptr) { if (file_meta->table_reader_handle != nullptr) {
// Load table_reader // Load table_reader
file_meta->table_reader = file_meta->fd.table_reader =
cfd_->table_cache()->GetTableReaderFromHandle( cfd_->table_cache()->GetTableReaderFromHandle(
file_meta->table_reader_handle); file_meta->table_reader_handle);
} }
@ -1586,7 +1570,7 @@ class VersionSet::Builder {
} }
void MaybeAddFile(Version* v, int level, FileMetaData* f) { void MaybeAddFile(Version* v, int level, FileMetaData* f) {
if (levels_[level].deleted_files.count(f->number) > 0) { if (levels_[level].deleted_files.count(f->fd.GetNumber()) > 0) {
// File is deleted: do nothing // File is deleted: do nothing
} else { } else {
auto* files = &v->files_[level]; auto* files = &v->files_[level];
@ -2592,12 +2576,8 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
for (int level = 0; level < cfd->NumberLevels(); level++) { for (int level = 0; level < cfd->NumberLevels(); level++) {
for (const auto& f : cfd->current()->files_[level]) { for (const auto& f : cfd->current()->files_[level]) {
edit.AddFile(level, edit.AddFile(level, f->fd.GetNumber(), f->fd.GetFileSize(),
f->number, f->smallest, f->largest, f->smallest_seqno,
f->file_size,
f->smallest,
f->largest,
f->smallest_seqno,
f->largest_seqno); f->largest_seqno);
} }
} }
@ -2653,7 +2633,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
if (v->cfd_->internal_comparator().Compare(files[i]->largest, ikey) <= if (v->cfd_->internal_comparator().Compare(files[i]->largest, ikey) <=
0) { 0) {
// Entire file is before "ikey", so just add the file size // Entire file is before "ikey", so just add the file size
result += files[i]->file_size; result += files[i]->fd.GetFileSize();
} else if (v->cfd_->internal_comparator().Compare(files[i]->smallest, } else if (v->cfd_->internal_comparator().Compare(files[i]->smallest,
ikey) > 0) { ikey) > 0) {
// Entire file is after "ikey", so ignore // Entire file is after "ikey", so ignore
@ -2669,7 +2649,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
TableReader* table_reader_ptr; TableReader* table_reader_ptr;
Iterator* iter = v->cfd_->table_cache()->NewIterator( Iterator* iter = v->cfd_->table_cache()->NewIterator(
ReadOptions(), storage_options_, v->cfd_->internal_comparator(), ReadOptions(), storage_options_, v->cfd_->internal_comparator(),
*(files[i]), &table_reader_ptr); files[i]->fd, &table_reader_ptr);
if (table_reader_ptr != nullptr) { if (table_reader_ptr != nullptr) {
result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode()); result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode());
} }
@ -2702,7 +2682,7 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_list) {
v = v->next_) { v = v->next_) {
for (int level = 0; level < v->NumberLevels(); level++) { for (int level = 0; level < v->NumberLevels(); level++) {
for (const auto& f : v->files_[level]) { for (const auto& f : v->files_[level]) {
live_list->push_back(f->number); live_list->push_back(f->fd.GetNumber());
} }
} }
} }
@ -2728,7 +2708,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
for (const auto& file : *c->inputs(which)) { for (const auto& file : *c->inputs(which)) {
list[num++] = cfd->table_cache()->NewIterator( list[num++] = cfd->table_cache()->NewIterator(
read_options, storage_options_compactions_, read_options, storage_options_compactions_,
cfd->internal_comparator(), *file, nullptr, cfd->internal_comparator(), file->fd, nullptr,
true /* for compaction */); true /* for compaction */);
} }
} else { } else {
@ -2763,13 +2743,13 @@ bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) {
// verify files in level // verify files in level
int level = c->level(); int level = c->level();
for (int i = 0; i < c->num_input_files(0); i++) { for (int i = 0; i < c->num_input_files(0); i++) {
uint64_t number = c->input(0,i)->number; uint64_t number = c->input(0, i)->fd.GetNumber();
// look for this file in the current version // look for this file in the current version
bool found = false; bool found = false;
for (unsigned int j = 0; j < version->files_[level].size(); j++) { for (unsigned int j = 0; j < version->files_[level].size(); j++) {
FileMetaData* f = version->files_[level][j]; FileMetaData* f = version->files_[level][j];
if (f->number == number) { if (f->fd.GetNumber() == number) {
found = true; found = true;
break; break;
} }
@ -2781,13 +2761,13 @@ bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) {
// verify level+1 files // verify level+1 files
level++; level++;
for (int i = 0; i < c->num_input_files(1); i++) { for (int i = 0; i < c->num_input_files(1); i++) {
uint64_t number = c->input(1,i)->number; uint64_t number = c->input(1, i)->fd.GetNumber();
// look for this file in the current version // look for this file in the current version
bool found = false; bool found = false;
for (unsigned int j = 0; j < version->files_[level].size(); j++) { for (unsigned int j = 0; j < version->files_[level].size(); j++) {
FileMetaData* f = version->files_[level][j]; FileMetaData* f = version->files_[level][j];
if (f->number == number) { if (f->fd.GetNumber() == number) {
found = true; found = true;
break; break;
} }
@ -2807,7 +2787,7 @@ Status VersionSet::GetMetadataForFile(uint64_t number, int* filelevel,
Version* version = cfd_iter->current(); Version* version = cfd_iter->current();
for (int level = 0; level < version->NumberLevels(); level++) { for (int level = 0; level < version->NumberLevels(); level++) {
for (const auto& file : version->files_[level]) { for (const auto& file : version->files_[level]) {
if (file->number == number) { if (file->fd.GetNumber() == number) {
*meta = file; *meta = file;
*filelevel = level; *filelevel = level;
*cfd = cfd_iter; *cfd = cfd_iter;
@ -2825,9 +2805,9 @@ void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
for (const auto& file : cfd->current()->files_[level]) { for (const auto& file : cfd->current()->files_[level]) {
LiveFileMetaData filemetadata; LiveFileMetaData filemetadata;
filemetadata.column_family_name = cfd->GetName(); filemetadata.column_family_name = cfd->GetName();
filemetadata.name = TableFileName("", file->number); filemetadata.name = TableFileName("", file->fd.GetNumber());
filemetadata.level = level; filemetadata.level = level;
filemetadata.size = file->file_size; filemetadata.size = file->fd.GetFileSize();
filemetadata.smallestkey = file->smallest.user_key().ToString(); filemetadata.smallestkey = file->smallest.user_key().ToString();
filemetadata.largestkey = file->largest.user_key().ToString(); filemetadata.largestkey = file->largest.user_key().ToString();
filemetadata.smallest_seqno = file->smallest_seqno; filemetadata.smallest_seqno = file->smallest_seqno;

@ -31,7 +31,7 @@ class FindFileTest {
SequenceNumber smallest_seq = 100, SequenceNumber smallest_seq = 100,
SequenceNumber largest_seq = 100) { SequenceNumber largest_seq = 100) {
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData;
f->number = files_.size() + 1; f->fd = FileDescriptor(files_.size() + 1, 0);
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue); f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
f->largest = InternalKey(largest, largest_seq, kTypeValue); f->largest = InternalKey(largest, largest_seq, kTypeValue);
files_.push_back(f); files_.push_back(f);

Loading…
Cancel
Save