Refactor blob file creation logic (#6066)

Summary:
The patch refactors and cleans up the logic around creating new blob files
by moving the common code of `SelectBlobFile` and `SelectBlobFileTTL`
to a new helper method `CreateBlobFileAndWriter`, bringing the implementation
of `SelectBlobFile` and `SelectBlobFileTTL` into sync, and increasing encapsulation
by adding new constructors for `BlobFile` and `BlobLogHeader`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6066

Test Plan:
Ran `make check` and used the BlobDB mode of `db_bench` to sanity test both
the TTL and the non-TTL code paths.

Differential Revision: D18646921

Pulled By: ltamasi

fbshipit-source-id: e5705a84807932e31dccab4f49b3e64369cea26d
main
Levi Tamasi 5 years ago committed by Facebook Github Bot
parent 771e1723c7
commit 72daa92d3a
  1. 193
      utilities/blob_db/blob_db_impl.cc
  2. 25
      utilities/blob_db/blob_db_impl.h
  3. 46
      utilities/blob_db/blob_file.cc
  4. 53
      utilities/blob_db/blob_file.h
  5. 10
      utilities/blob_db/blob_log_format.h

@ -84,7 +84,6 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
statistics_(db_options_.statistics.get()), statistics_(db_options_.statistics.get()),
next_file_number_(1), next_file_number_(1),
flush_sequence_(0), flush_sequence_(0),
epoch_of_(0),
closed_(true), closed_(true),
open_file_count_(0), open_file_count_(0),
total_blob_size_(0), total_blob_size_(0),
@ -584,14 +583,24 @@ Status BlobDBImpl::GetBlobFileReader(
return s; return s;
} }
std::shared_ptr<BlobFile> BlobDBImpl::NewBlobFile(const std::string& reason) { std::shared_ptr<BlobFile> BlobDBImpl::NewBlobFile(
bool has_ttl, const ExpirationRange& expiration_range,
const std::string& reason) {
assert(has_ttl == (expiration_range.first || expiration_range.second));
uint64_t file_num = next_file_number_++; uint64_t file_num = next_file_number_++;
auto bfile = std::make_shared<BlobFile>(this, blob_dir_, file_num,
db_options_.info_log.get()); const uint32_t column_family_id =
static_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->GetID();
auto blob_file = std::make_shared<BlobFile>(
this, blob_dir_, file_num, db_options_.info_log.get(), column_family_id,
bdb_options_.compression, has_ttl, expiration_range);
ROCKS_LOG_DEBUG(db_options_.info_log, "New blob file created: %s reason='%s'", ROCKS_LOG_DEBUG(db_options_.info_log, "New blob file created: %s reason='%s'",
bfile->PathName().c_str(), reason.c_str()); blob_file->PathName().c_str(), reason.c_str());
LogFlush(db_options_.info_log); LogFlush(db_options_.info_log);
return bfile;
return blob_file;
} }
Status BlobDBImpl::CreateWriterLocked(const std::shared_ptr<BlobFile>& bfile) { Status BlobDBImpl::CreateWriterLocked(const std::shared_ptr<BlobFile>& bfile) {
@ -687,47 +696,29 @@ Status BlobDBImpl::CheckOrCreateWriterLocked(
return s; return s;
} }
Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) { Status BlobDBImpl::CreateBlobFileAndWriter(
assert(blob_file != nullptr); bool has_ttl, const ExpirationRange& expiration_range,
{ const std::string& reason, std::shared_ptr<BlobFile>* blob_file,
ReadLock rl(&mutex_); std::shared_ptr<Writer>* writer) {
if (open_non_ttl_file_ != nullptr) { assert(has_ttl == (expiration_range.first || expiration_range.second));
*blob_file = open_non_ttl_file_; assert(blob_file);
return Status::OK(); assert(writer);
}
}
// CHECK again
WriteLock wl(&mutex_);
if (open_non_ttl_file_ != nullptr) {
*blob_file = open_non_ttl_file_;
return Status::OK();
}
*blob_file = NewBlobFile("SelectBlobFile"); *blob_file = NewBlobFile(has_ttl, expiration_range, reason);
assert(*blob_file != nullptr); assert(*blob_file);
// file not visible, hence no lock // file not visible, hence no lock
std::shared_ptr<Writer> writer; Status s = CheckOrCreateWriterLocked(*blob_file, writer);
Status s = CheckOrCreateWriterLocked(*blob_file, &writer);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(db_options_.info_log, ROCKS_LOG_ERROR(db_options_.info_log,
"Failed to get writer from blob file: %s, error: %s", "Failed to get writer for blob file: %s, error: %s",
(*blob_file)->PathName().c_str(), s.ToString().c_str()); (*blob_file)->PathName().c_str(), s.ToString().c_str());
return s; return s;
} }
(*blob_file)->file_size_ = BlobLogHeader::kSize; assert(*writer);
(*blob_file)->header_.compression = bdb_options_.compression;
(*blob_file)->header_.has_ttl = false;
(*blob_file)->header_.column_family_id =
reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->GetID();
(*blob_file)->header_valid_ = true;
(*blob_file)->SetColumnFamilyId((*blob_file)->header_.column_family_id);
(*blob_file)->SetHasTTL(false);
(*blob_file)->SetCompression(bdb_options_.compression);
s = writer->WriteHeader((*blob_file)->header_); s = (*writer)->WriteHeader((*blob_file)->header_);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(db_options_.info_log, ROCKS_LOG_ERROR(db_options_.info_log,
"Failed to write header to new blob file: %s" "Failed to write header to new blob file: %s"
@ -736,93 +727,92 @@ Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
return s; return s;
} }
blob_files_.insert( (*blob_file)->SetFileSize(BlobLogHeader::kSize);
std::make_pair((*blob_file)->BlobFileNumber(), *blob_file));
open_non_ttl_file_ = *blob_file;
total_blob_size_ += BlobLogHeader::kSize; total_blob_size_ += BlobLogHeader::kSize;
return s; return s;
} }
Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration, Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
std::shared_ptr<BlobFile>* blob_file) { assert(blob_file);
assert(blob_file != nullptr);
assert(expiration != kNoExpiration);
uint64_t epoch_read = 0;
{ {
ReadLock rl(&mutex_); ReadLock rl(&mutex_);
*blob_file = FindBlobFileLocked(expiration);
epoch_read = epoch_of_.load();
}
if (*blob_file != nullptr) { if (open_non_ttl_file_) {
assert(!(*blob_file)->Immutable()); assert(!open_non_ttl_file_->Immutable());
return Status::OK(); *blob_file = open_non_ttl_file_;
return Status::OK();
}
} }
uint64_t exp_low = // Check again
(expiration / bdb_options_.ttl_range_secs) * bdb_options_.ttl_range_secs; WriteLock wl(&mutex_);
uint64_t exp_high = exp_low + bdb_options_.ttl_range_secs;
ExpirationRange expiration_range = std::make_pair(exp_low, exp_high);
*blob_file = NewBlobFile("SelectBlobFileTTL");
assert(*blob_file != nullptr);
ROCKS_LOG_INFO(db_options_.info_log, if (open_non_ttl_file_) {
"New blob file TTL range: %s %" PRIu64 " %" PRIu64, assert(!open_non_ttl_file_->Immutable());
(*blob_file)->PathName().c_str(), exp_low, exp_high); *blob_file = open_non_ttl_file_;
LogFlush(db_options_.info_log); return Status::OK();
}
// we don't need to take lock as no other thread is seeing bfile yet
std::shared_ptr<Writer> writer; std::shared_ptr<Writer> writer;
Status s = CheckOrCreateWriterLocked(*blob_file, &writer); const Status s = CreateBlobFileAndWriter(
/* has_ttl */ false, ExpirationRange(),
/* reason */ "SelectBlobFile", blob_file, &writer);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(
db_options_.info_log,
"Failed to get writer from blob file with TTL: %s, error: %s",
(*blob_file)->PathName().c_str(), s.ToString().c_str());
return s; return s;
} }
(*blob_file)->header_.expiration_range = expiration_range; blob_files_.insert(std::map<uint64_t, std::shared_ptr<BlobFile>>::value_type(
(*blob_file)->header_.compression = bdb_options_.compression; (*blob_file)->BlobFileNumber(), *blob_file));
(*blob_file)->header_.has_ttl = true; open_non_ttl_file_ = *blob_file;
(*blob_file)->header_.column_family_id =
reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->GetID();
(*blob_file)->header_valid_ = true;
(*blob_file)->SetColumnFamilyId((*blob_file)->header_.column_family_id);
(*blob_file)->SetHasTTL(true);
(*blob_file)->SetCompression(bdb_options_.compression);
(*blob_file)->file_size_ = BlobLogHeader::kSize;
// set the first value of the range, since that is return s;
// concrete at this time. also necessary to add to open_ttl_files_ }
(*blob_file)->expiration_range_ = expiration_range;
WriteLock wl(&mutex_); Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration,
// in case the epoch has shifted in the interim, then check std::shared_ptr<BlobFile>* blob_file) {
// check condition again - should be rare. assert(blob_file);
if (epoch_of_.load() != epoch_read) { assert(expiration != kNoExpiration);
std::shared_ptr<BlobFile> blob_file2 = FindBlobFileLocked(expiration);
if (blob_file2 != nullptr) { {
*blob_file = std::move(blob_file2); ReadLock rl(&mutex_);
*blob_file = FindBlobFileLocked(expiration);
if (*blob_file != nullptr) {
assert(!(*blob_file)->Immutable());
return Status::OK(); return Status::OK();
} }
} }
s = writer->WriteHeader((*blob_file)->header_); // Check again
WriteLock wl(&mutex_);
*blob_file = FindBlobFileLocked(expiration);
if (*blob_file != nullptr) {
assert(!(*blob_file)->Immutable());
return Status::OK();
}
const uint64_t exp_low =
(expiration / bdb_options_.ttl_range_secs) * bdb_options_.ttl_range_secs;
const uint64_t exp_high = exp_low + bdb_options_.ttl_range_secs;
const ExpirationRange expiration_range(exp_low, exp_high);
std::ostringstream oss;
oss << "SelectBlobFileTTL range: [" << exp_low << ',' << exp_high << ')';
std::shared_ptr<Writer> writer;
const Status s =
CreateBlobFileAndWriter(/* has_ttl */ true, expiration_range,
/* reason */ oss.str(), blob_file, &writer);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(db_options_.info_log,
"Failed to write header to new blob file: %s"
" status: '%s'",
(*blob_file)->PathName().c_str(), s.ToString().c_str());
return s; return s;
} }
blob_files_.insert( blob_files_.insert(std::map<uint64_t, std::shared_ptr<BlobFile>>::value_type(
std::make_pair((*blob_file)->BlobFileNumber(), *blob_file)); (*blob_file)->BlobFileNumber(), *blob_file));
open_ttl_files_.insert(*blob_file); open_ttl_files_.insert(*blob_file);
total_blob_size_ += BlobLogHeader::kSize;
epoch_of_++;
return s; return s;
} }
@ -1954,7 +1944,7 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
// new file // new file
std::string reason("GC of "); std::string reason("GC of ");
reason += bfptr->PathName(); reason += bfptr->PathName();
newfile = NewBlobFile(reason); newfile = NewBlobFile(bfptr->HasTTL(), bfptr->expiration_range_, reason);
s = CheckOrCreateWriterLocked(newfile, &new_writer); s = CheckOrCreateWriterLocked(newfile, &new_writer);
if (!s.ok()) { if (!s.ok()) {
@ -1963,14 +1953,7 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
newfile->PathName().c_str(), s.ToString().c_str()); newfile->PathName().c_str(), s.ToString().c_str());
break; break;
} }
// Can't use header beyond this point
newfile->header_ = std::move(header);
newfile->header_valid_ = true;
newfile->file_size_ = BlobLogHeader::kSize; newfile->file_size_ = BlobLogHeader::kSize;
newfile->SetColumnFamilyId(bfptr->column_family_id());
newfile->SetHasTTL(bfptr->HasTTL());
newfile->SetCompression(bfptr->compression());
newfile->expiration_range_ = bfptr->expiration_range_;
s = new_writer->WriteHeader(newfile->header_); s = new_writer->WriteHeader(newfile->header_);
if (!s.ok()) { if (!s.ok()) {

@ -264,14 +264,22 @@ class BlobDBImpl : public BlobDB {
const Slice& value, uint64_t expiration, const Slice& value, uint64_t expiration,
std::string* index_entry); std::string* index_entry);
// find an existing blob log file based on the expiration unix epoch // Create a new blob file and associated writer.
// if such a file does not exist, return nullptr Status CreateBlobFileAndWriter(bool has_ttl,
const ExpirationRange& expiration_range,
const std::string& reason,
std::shared_ptr<BlobFile>* blob_file,
std::shared_ptr<Writer>* writer);
// Get the open non-TTL blob log file, or create a new one if no such file
// exists.
Status SelectBlobFile(std::shared_ptr<BlobFile>* blob_file);
// Get the open TTL blob log file for a certain expiration, or create a new
// one if no such file exists.
Status SelectBlobFileTTL(uint64_t expiration, Status SelectBlobFileTTL(uint64_t expiration,
std::shared_ptr<BlobFile>* blob_file); std::shared_ptr<BlobFile>* blob_file);
// find an existing blob log file to append the value to
Status SelectBlobFile(std::shared_ptr<BlobFile>* blob_file);
std::shared_ptr<BlobFile> FindBlobFileLocked(uint64_t expiration) const; std::shared_ptr<BlobFile> FindBlobFileLocked(uint64_t expiration) const;
// periodic sanity check. Bunch of checks // periodic sanity check. Bunch of checks
@ -300,7 +308,9 @@ class BlobDBImpl : public BlobDB {
void StartBackgroundTasks(); void StartBackgroundTasks();
// add a new Blob File // add a new Blob File
std::shared_ptr<BlobFile> NewBlobFile(const std::string& reason); std::shared_ptr<BlobFile> NewBlobFile(bool has_ttl,
const ExpirationRange& expiration_range,
const std::string& reason);
// collect all the blob log files from the blob directory // collect all the blob log files from the blob directory
Status GetAllBlobFiles(std::set<uint64_t>* file_numbers); Status GetAllBlobFiles(std::set<uint64_t>* file_numbers);
@ -434,9 +444,6 @@ class BlobDBImpl : public BlobDB {
// The largest sequence number that has been flushed. // The largest sequence number that has been flushed.
SequenceNumber flush_sequence_; SequenceNumber flush_sequence_;
// epoch or version of the open files.
std::atomic<uint64_t> epoch_of_;
// opened non-TTL blob file. // opened non-TTL blob file.
std::shared_ptr<BlobFile> open_non_ttl_file_; std::shared_ptr<BlobFile> open_non_ttl_file_;

@ -10,7 +10,6 @@
#include <cinttypes> #include <cinttypes>
#include <algorithm> #include <algorithm>
#include <limits>
#include <memory> #include <memory>
#include "db/column_family.h" #include "db/column_family.h"
@ -25,45 +24,24 @@ namespace rocksdb {
namespace blob_db { namespace blob_db {
BlobFile::BlobFile()
: parent_(nullptr),
file_number_(0),
info_log_(nullptr),
column_family_id_(std::numeric_limits<uint32_t>::max()),
compression_(kNoCompression),
has_ttl_(false),
blob_count_(0),
file_size_(0),
closed_(false),
immutable_sequence_(0),
obsolete_(false),
obsolete_sequence_(0),
expiration_range_({0, 0}),
last_access_(-1),
last_fsync_(0),
header_valid_(false),
footer_valid_(false) {}
BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn, BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn,
Logger* info_log) Logger* info_log)
: parent_(p), path_to_dir_(bdir), file_number_(fn), info_log_(info_log) {}
BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn,
Logger* info_log, uint32_t column_family_id,
CompressionType compression, bool has_ttl,
const ExpirationRange& expiration_range)
: parent_(p), : parent_(p),
path_to_dir_(bdir), path_to_dir_(bdir),
file_number_(fn), file_number_(fn),
info_log_(info_log), info_log_(info_log),
column_family_id_(std::numeric_limits<uint32_t>::max()), column_family_id_(column_family_id),
compression_(kNoCompression), compression_(compression),
has_ttl_(false), has_ttl_(has_ttl),
blob_count_(0), expiration_range_(expiration_range),
file_size_(0), header_(column_family_id, compression, has_ttl, expiration_range),
closed_(false), header_valid_(true) {}
immutable_sequence_(0),
obsolete_(false),
obsolete_sequence_(0),
expiration_range_({0, 0}),
last_access_(-1),
last_fsync_(0),
header_valid_(false),
footer_valid_(false) {}
BlobFile::~BlobFile() { BlobFile::~BlobFile() {
if (obsolete_) { if (obsolete_) {

@ -6,6 +6,7 @@
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
#include <atomic> #include <atomic>
#include <limits>
#include <memory> #include <memory>
#include <unordered_set> #include <unordered_set>
@ -29,7 +30,7 @@ class BlobFile {
private: private:
// access to parent // access to parent
const BlobDBImpl* parent_; const BlobDBImpl* parent_{nullptr};
// path to blob directory // path to blob directory
std::string path_to_dir_; std::string path_to_dir_;
@ -37,49 +38,50 @@ class BlobFile {
// the id of the file. // the id of the file.
// the above 2 are created during file creation and never changed // the above 2 are created during file creation and never changed
// after that // after that
uint64_t file_number_; uint64_t file_number_{0};
// The file numbers of the SST files whose oldest blob file reference // The file numbers of the SST files whose oldest blob file reference
// points to this blob file. // points to this blob file.
std::unordered_set<uint64_t> linked_sst_files_; std::unordered_set<uint64_t> linked_sst_files_;
// Info log. // Info log.
Logger* info_log_; Logger* info_log_{nullptr};
// Column family id. // Column family id.
uint32_t column_family_id_; uint32_t column_family_id_{std::numeric_limits<uint32_t>::max()};
// Compression type of blobs in the file // Compression type of blobs in the file
CompressionType compression_; CompressionType compression_{kNoCompression};
// If true, the keys in this file all has TTL. Otherwise all keys don't // If true, the keys in this file all has TTL. Otherwise all keys don't
// have TTL. // have TTL.
bool has_ttl_; bool has_ttl_{false};
// TTL range of blobs in the file.
ExpirationRange expiration_range_;
// number of blobs in the file // number of blobs in the file
std::atomic<uint64_t> blob_count_; std::atomic<uint64_t> blob_count_{0};
// size of the file // size of the file
std::atomic<uint64_t> file_size_; std::atomic<uint64_t> file_size_{0};
BlobLogHeader header_; BlobLogHeader header_;
// closed_ = true implies the file is no more mutable // closed_ = true implies the file is no more mutable
// no more blobs will be appended and the footer has been written out // no more blobs will be appended and the footer has been written out
std::atomic<bool> closed_; std::atomic<bool> closed_{false};
// The latest sequence number when the file was closed/made immutable. // The latest sequence number when the file was closed/made immutable.
SequenceNumber immutable_sequence_; SequenceNumber immutable_sequence_{0};
// has a pass of garbage collection successfully finished on this file // has a pass of garbage collection successfully finished on this file
// obsolete_ still needs to do iterator/snapshot checks // obsolete_ still needs to do iterator/snapshot checks
std::atomic<bool> obsolete_; std::atomic<bool> obsolete_{false};
// The last sequence number by the time the file marked as obsolete. // The last sequence number by the time the file marked as obsolete.
// Data in this file is visible to a snapshot taken before the sequence. // Data in this file is visible to a snapshot taken before the sequence.
SequenceNumber obsolete_sequence_; SequenceNumber obsolete_sequence_{0};
ExpirationRange expiration_range_;
// Sequential/Append writer for blobs // Sequential/Append writer for blobs
std::shared_ptr<Writer> log_writer_; std::shared_ptr<Writer> log_writer_;
@ -92,29 +94,30 @@ class BlobFile {
mutable port::RWMutex mutex_; mutable port::RWMutex mutex_;
// time when the random access reader was last created. // time when the random access reader was last created.
std::atomic<std::int64_t> last_access_; std::atomic<std::int64_t> last_access_{-1};
// last time file was fsync'd/fdatasyncd // last time file was fsync'd/fdatasyncd
std::atomic<uint64_t> last_fsync_; std::atomic<uint64_t> last_fsync_{0};
bool header_valid_; bool header_valid_{false};
bool footer_valid_; bool footer_valid_{false};
public: public:
BlobFile(); BlobFile() = default;
BlobFile(const BlobDBImpl* parent, const std::string& bdir, uint64_t fnum, BlobFile(const BlobDBImpl* parent, const std::string& bdir, uint64_t fnum,
Logger* info_log); Logger* info_log);
BlobFile(const BlobDBImpl* parent, const std::string& bdir, uint64_t fnum,
Logger* info_log, uint32_t column_family_id,
CompressionType compression, bool has_ttl,
const ExpirationRange& expiration_range);
~BlobFile(); ~BlobFile();
uint32_t column_family_id() const; uint32_t column_family_id() const;
void SetColumnFamilyId(uint32_t cf_id) {
column_family_id_ = cf_id;
}
// Returns log file's absolute pathname. // Returns log file's absolute pathname.
std::string PathName() const; std::string PathName() const;
@ -203,10 +206,6 @@ class BlobFile {
CompressionType compression() const { return compression_; } CompressionType compression() const { return compression_; }
void SetCompression(CompressionType c) {
compression_ = c;
}
std::shared_ptr<Writer> GetWriter() const { return log_writer_; } std::shared_ptr<Writer> GetWriter() const { return log_writer_; }
// Read blob file header and footer. Return corruption if file header is // Read blob file header and footer. Return corruption if file header is

@ -43,11 +43,19 @@ using ExpirationRange = std::pair<uint64_t, uint64_t>;
struct BlobLogHeader { struct BlobLogHeader {
static constexpr size_t kSize = 30; static constexpr size_t kSize = 30;
BlobLogHeader() = default;
BlobLogHeader(uint32_t _column_family_id, CompressionType _compression,
bool _has_ttl, const ExpirationRange& _expiration_range)
: column_family_id(_column_family_id),
compression(_compression),
has_ttl(_has_ttl),
expiration_range(_expiration_range) {}
uint32_t version = kVersion1; uint32_t version = kVersion1;
uint32_t column_family_id = 0; uint32_t column_family_id = 0;
CompressionType compression = kNoCompression; CompressionType compression = kNoCompression;
bool has_ttl = false; bool has_ttl = false;
ExpirationRange expiration_range = std::make_pair(0, 0); ExpirationRange expiration_range;
void EncodeTo(std::string* dst); void EncodeTo(std::string* dst);

Loading…
Cancel
Save