|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#include "db/compacted_db_impl.h"
|
|
|
|
#include "db/db_impl.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "table/get_context.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
extern void MarkKeyMayExist(void* arg);
|
|
|
|
extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
|
|
|
|
const Slice& v, bool hit_and_return);
|
|
|
|
|
|
|
|
CompactedDBImpl::CompactedDBImpl(
|
|
|
|
const DBOptions& options, const std::string& dbname)
|
|
|
|
: DBImpl(options, dbname), cfd_(nullptr), version_(nullptr),
|
|
|
|
user_comparator_(nullptr) {
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactedDBImpl::~CompactedDBImpl() {
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CompactedDBImpl::FindFile(const Slice& key) {
|
|
|
|
size_t right = files_.num_files - 1;
|
|
|
|
auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool {
|
|
|
|
return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0;
|
|
|
|
};
|
|
|
|
return static_cast<size_t>(std::lower_bound(files_.files,
|
|
|
|
files_.files + right, key, cmp) - files_.files);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*,
|
|
|
|
const Slice& key, PinnableSlice* value) {
|
|
|
|
GetContext get_context(user_comparator_, nullptr, nullptr, nullptr,
|
|
|
|
GetContext::kNotFound, key, value, nullptr, nullptr,
|
|
|
|
nullptr, nullptr);
|
|
|
|
LookupKey lkey(key, kMaxSequenceNumber);
|
|
|
|
files_.files[FindFile(key)].fd.table_reader->Get(options, lkey.internal_key(),
|
|
|
|
&get_context, nullptr);
|
|
|
|
if (get_context.State() == GetContext::kFound) {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
return Status::NotFound();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Status> CompactedDBImpl::MultiGet(const ReadOptions& options,
|
|
|
|
const std::vector<ColumnFamilyHandle*>&,
|
|
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) {
|
|
|
|
autovector<TableReader*, 16> reader_list;
|
|
|
|
for (const auto& key : keys) {
|
|
|
|
const FdWithKeyRange& f = files_.files[FindFile(key)];
|
|
|
|
if (user_comparator_->Compare(key, ExtractUserKey(f.smallest_key)) < 0) {
|
|
|
|
reader_list.push_back(nullptr);
|
|
|
|
} else {
|
|
|
|
LookupKey lkey(key, kMaxSequenceNumber);
|
|
|
|
f.fd.table_reader->Prepare(lkey.internal_key());
|
|
|
|
reader_list.push_back(f.fd.table_reader);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<Status> statuses(keys.size(), Status::NotFound());
|
|
|
|
values->resize(keys.size());
|
|
|
|
int idx = 0;
|
|
|
|
for (auto* r : reader_list) {
|
|
|
|
if (r != nullptr) {
|
|
|
|
PinnableSlice pinnable_val;
|
|
|
|
std::string& value = (*values)[idx];
|
|
|
|
GetContext get_context(user_comparator_, nullptr, nullptr, nullptr,
|
|
|
|
GetContext::kNotFound, keys[idx], &pinnable_val,
|
|
|
|
nullptr, nullptr, nullptr, nullptr);
|
|
|
|
LookupKey lkey(keys[idx], kMaxSequenceNumber);
|
|
|
|
r->Get(options, lkey.internal_key(), &get_context, nullptr);
|
|
|
|
value.assign(pinnable_val.data(), pinnable_val.size());
|
|
|
|
if (get_context.State() == GetContext::kFound) {
|
|
|
|
statuses[idx] = Status::OK();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++idx;
|
|
|
|
}
|
|
|
|
return statuses;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status CompactedDBImpl::Init(const Options& options) {
|
|
|
|
SuperVersionContext sv_context(/* create_superversion */ true);
|
|
|
|
mutex_.Lock();
|
|
|
|
ColumnFamilyDescriptor cf(kDefaultColumnFamilyName,
|
|
|
|
ColumnFamilyOptions(options));
|
|
|
|
Status s = Recover({cf}, true /* read only */, false, true);
|
|
|
|
if (s.ok()) {
|
|
|
|
cfd_ = reinterpret_cast<ColumnFamilyHandleImpl*>(
|
|
|
|
DefaultColumnFamily())->cfd();
|
|
|
|
cfd_->InstallSuperVersion(&sv_context, &mutex_);
|
|
|
|
}
|
|
|
|
mutex_.Unlock();
|
|
|
|
sv_context.Clean();
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
NewThreadStatusCfInfo(cfd_);
|
|
|
|
version_ = cfd_->GetSuperVersion()->current;
|
|
|
|
user_comparator_ = cfd_->user_comparator();
|
|
|
|
auto* vstorage = version_->storage_info();
|
|
|
|
if (vstorage->num_non_empty_levels() == 0) {
|
|
|
|
return Status::NotSupported("no file exists");
|
|
|
|
}
|
|
|
|
const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0);
|
|
|
|
// L0 should not have files
|
|
|
|
if (l0.num_files > 1) {
|
|
|
|
return Status::NotSupported("L0 contain more than 1 file");
|
|
|
|
}
|
|
|
|
if (l0.num_files == 1) {
|
|
|
|
if (vstorage->num_non_empty_levels() > 1) {
|
|
|
|
return Status::NotSupported("Both L0 and other level contain files");
|
|
|
|
}
|
|
|
|
files_ = l0;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) {
|
|
|
|
if (vstorage->LevelFilesBrief(i).num_files > 0) {
|
|
|
|
return Status::NotSupported("Other levels also contain files");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int level = vstorage->num_non_empty_levels() - 1;
|
|
|
|
if (vstorage->LevelFilesBrief(level).num_files > 0) {
|
|
|
|
files_ = vstorage->LevelFilesBrief(level);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
return Status::NotSupported("no file exists");
|
|
|
|
}
|
|
|
|
|
|
|
|
Status CompactedDBImpl::Open(const Options& options,
|
|
|
|
const std::string& dbname, DB** dbptr) {
|
|
|
|
*dbptr = nullptr;
|
|
|
|
|
|
|
|
if (options.max_open_files != -1) {
|
|
|
|
return Status::InvalidArgument("require max_open_files = -1");
|
|
|
|
}
|
|
|
|
if (options.merge_operator.get() != nullptr) {
|
|
|
|
return Status::InvalidArgument("merge operator is not supported");
|
|
|
|
}
|
|
|
|
DBOptions db_options(options);
|
|
|
|
std::unique_ptr<CompactedDBImpl> db(new CompactedDBImpl(db_options, dbname));
|
|
|
|
Status s = db->Init(options);
|
|
|
|
if (s.ok()) {
|
move dump stats to a separate thread (#4382)
Summary:
Currently statistics are supposed to be dumped to info log at intervals of `options.stats_dump_period_sec`. However the implementation choice was to bind it with compaction thread, meaning if the database has been serving very light traffic, the stats may not get dumped at all.
We decided to separate stats dumping into a new timed thread using `TimerQueue`, which is already used in blob_db. This will allow us schedule new timed tasks with more deterministic behavior.
Tested with db_bench using `--stats_dump_period_sec=20` in command line:
> LOG:2018/09/17-14:07:45.575025 7fe99fbfe700 [WARN] [db/db_impl.cc:605] ------- DUMPING STATS -------
LOG:2018/09/17-14:08:05.643286 7fe99fbfe700 [WARN] [db/db_impl.cc:605] ------- DUMPING STATS -------
LOG:2018/09/17-14:08:25.691325 7fe99fbfe700 [WARN] [db/db_impl.cc:605] ------- DUMPING STATS -------
LOG:2018/09/17-14:08:45.740989 7fe99fbfe700 [WARN] [db/db_impl.cc:605] ------- DUMPING STATS -------
LOG content:
> 2018/09/17-14:07:45.575025 7fe99fbfe700 [WARN] [db/db_impl.cc:605] ------- DUMPING STATS -------
2018/09/17-14:07:45.575080 7fe99fbfe700 [WARN] [db/db_impl.cc:606]
** DB Stats **
Uptime(secs): 20.0 total, 20.0 interval
Cumulative writes: 4447K writes, 4447K keys, 4447K commit groups, 1.0 writes per commit group, ingest: 5.57 GB, 285.01 MB/s
Cumulative WAL: 4447K writes, 0 syncs, 4447638.00 writes per sync, written: 5.57 GB, 285.01 MB/s
Cumulative stall: 00:00:0.012 H:M:S, 0.1 percent
Interval writes: 4447K writes, 4447K keys, 4447K commit groups, 1.0 writes per commit group, ingest: 5700.71 MB, 285.01 MB/s
Interval WAL: 4447K writes, 0 syncs, 4447638.00 writes per sync, written: 5.57 MB, 285.01 MB/s
Interval stall: 00:00:0.012 H:M:S, 0.1 percent
** Compaction Stats [default] **
Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4382
Differential Revision: D9933051
Pulled By: miasantreble
fbshipit-source-id: 6d12bb1e4977674eea4bf2d2ac6d486b814bb2fa
6 years ago
|
|
|
db->StartTimedTasks();
|
|
|
|
ROCKS_LOG_INFO(db->immutable_db_options_.info_log,
|
|
|
|
"Opened the db as fully compacted mode");
|
|
|
|
LogFlush(db->immutable_db_options_.info_log);
|
|
|
|
*dbptr = db.release();
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // ROCKSDB_LITE
|