commit
ba6d660f6d
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,62 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "db/flush_scheduler.h" |
||||||
|
|
||||||
|
#include <cassert> |
||||||
|
|
||||||
|
#include "db/column_family.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
void FlushScheduler::ScheduleFlush(ColumnFamilyData* cfd) { |
||||||
|
#ifndef NDEBUG |
||||||
|
assert(column_families_set_.find(cfd) == column_families_set_.end()); |
||||||
|
column_families_set_.insert(cfd); |
||||||
|
#endif // NDEBUG
|
||||||
|
cfd->Ref(); |
||||||
|
column_families_.push_back(cfd); |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyData* FlushScheduler::GetNextColumnFamily() { |
||||||
|
ColumnFamilyData* cfd = nullptr; |
||||||
|
while (column_families_.size() > 0) { |
||||||
|
cfd = column_families_.front(); |
||||||
|
column_families_.pop_front(); |
||||||
|
if (cfd->IsDropped()) { |
||||||
|
if (cfd->Unref()) { |
||||||
|
delete cfd; |
||||||
|
} |
||||||
|
} else { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
#ifndef NDEBUG |
||||||
|
if (cfd != nullptr) { |
||||||
|
auto itr = column_families_set_.find(cfd); |
||||||
|
assert(itr != column_families_set_.end()); |
||||||
|
column_families_set_.erase(itr); |
||||||
|
} |
||||||
|
#endif // NDEBUG
|
||||||
|
return cfd; |
||||||
|
} |
||||||
|
|
||||||
|
bool FlushScheduler::Empty() { return column_families_.empty(); } |
||||||
|
|
||||||
|
void FlushScheduler::Clear() { |
||||||
|
for (auto cfd : column_families_) { |
||||||
|
#ifndef NDEBUG |
||||||
|
auto itr = column_families_set_.find(cfd); |
||||||
|
assert(itr != column_families_set_.end()); |
||||||
|
column_families_set_.erase(itr); |
||||||
|
#endif // NDEBUG
|
||||||
|
if (cfd->Unref()) { |
||||||
|
delete cfd; |
||||||
|
} |
||||||
|
} |
||||||
|
column_families_.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,39 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <deque> |
||||||
|
#include <set> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class ColumnFamilyData; |
||||||
|
|
||||||
|
// This class is thread-compatible. It's should only be accessed from single
|
||||||
|
// write thread (between BeginWrite() and EndWrite())
|
||||||
|
class FlushScheduler { |
||||||
|
public: |
||||||
|
FlushScheduler() = default; |
||||||
|
~FlushScheduler() = default; |
||||||
|
|
||||||
|
void ScheduleFlush(ColumnFamilyData* cfd); |
||||||
|
// Returns Ref()-ed column family. Client needs to Unref()
|
||||||
|
ColumnFamilyData* GetNextColumnFamily(); |
||||||
|
|
||||||
|
bool Empty(); |
||||||
|
|
||||||
|
void Clear(); |
||||||
|
|
||||||
|
private: |
||||||
|
std::deque<ColumnFamilyData*> column_families_; |
||||||
|
#ifndef NDEBUG |
||||||
|
std::set<ColumnFamilyData*> column_families_set_; |
||||||
|
#endif // NDEBUG
|
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,37 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "db/write_controller.h" |
||||||
|
|
||||||
|
#include <cassert> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
std::unique_ptr<WriteControllerToken> WriteController::GetStopToken() { |
||||||
|
++total_stopped_; |
||||||
|
return std::unique_ptr<WriteControllerToken>(new StopWriteToken(this)); |
||||||
|
} |
||||||
|
|
||||||
|
std::unique_ptr<WriteControllerToken> WriteController::GetDelayToken( |
||||||
|
uint64_t delay_us) { |
||||||
|
total_delay_us_ += delay_us; |
||||||
|
return std::unique_ptr<WriteControllerToken>( |
||||||
|
new DelayWriteToken(this, delay_us)); |
||||||
|
} |
||||||
|
|
||||||
|
bool WriteController::IsStopped() const { return total_stopped_ > 0; } |
||||||
|
uint64_t WriteController::GetDelay() const { return total_delay_us_; } |
||||||
|
|
||||||
|
StopWriteToken::~StopWriteToken() { |
||||||
|
assert(controller_->total_stopped_ >= 1); |
||||||
|
--controller_->total_stopped_; |
||||||
|
} |
||||||
|
|
||||||
|
DelayWriteToken::~DelayWriteToken() { |
||||||
|
assert(controller_->total_delay_us_ >= delay_us_); |
||||||
|
controller_->total_delay_us_ -= delay_us_; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,78 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include <memory> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class WriteControllerToken; |
||||||
|
|
||||||
|
// WriteController is controlling write stalls in our write code-path. Write
|
||||||
|
// stalls happen when compaction can't keep up with write rate.
|
||||||
|
// All of the methods here (including WriteControllerToken's destructors) need
|
||||||
|
// to be called while holding DB mutex
|
||||||
|
class WriteController { |
||||||
|
public: |
||||||
|
WriteController() : total_stopped_(0), total_delay_us_(0) {} |
||||||
|
~WriteController() = default; |
||||||
|
|
||||||
|
// When an actor (column family) requests a stop token, all writes will be
|
||||||
|
// stopped until the stop token is released (deleted)
|
||||||
|
std::unique_ptr<WriteControllerToken> GetStopToken(); |
||||||
|
// When an actor (column family) requests a delay token, total delay for all
|
||||||
|
// writes will be increased by delay_us. The delay will last until delay token
|
||||||
|
// is released
|
||||||
|
std::unique_ptr<WriteControllerToken> GetDelayToken(uint64_t delay_us); |
||||||
|
|
||||||
|
// these two metods are querying the state of the WriteController
|
||||||
|
bool IsStopped() const; |
||||||
|
uint64_t GetDelay() const; |
||||||
|
|
||||||
|
private: |
||||||
|
friend class WriteControllerToken; |
||||||
|
friend class StopWriteToken; |
||||||
|
friend class DelayWriteToken; |
||||||
|
|
||||||
|
int total_stopped_; |
||||||
|
uint64_t total_delay_us_; |
||||||
|
}; |
||||||
|
|
||||||
|
class WriteControllerToken { |
||||||
|
public: |
||||||
|
explicit WriteControllerToken(WriteController* controller) |
||||||
|
: controller_(controller) {} |
||||||
|
virtual ~WriteControllerToken() {} |
||||||
|
|
||||||
|
protected: |
||||||
|
WriteController* controller_; |
||||||
|
|
||||||
|
private: |
||||||
|
// no copying allowed
|
||||||
|
WriteControllerToken(const WriteControllerToken&) = delete; |
||||||
|
void operator=(const WriteControllerToken&) = delete; |
||||||
|
}; |
||||||
|
|
||||||
|
class StopWriteToken : public WriteControllerToken { |
||||||
|
public: |
||||||
|
explicit StopWriteToken(WriteController* controller) |
||||||
|
: WriteControllerToken(controller) {} |
||||||
|
virtual ~StopWriteToken(); |
||||||
|
}; |
||||||
|
|
||||||
|
class DelayWriteToken : public WriteControllerToken { |
||||||
|
public: |
||||||
|
DelayWriteToken(WriteController* controller, uint64_t delay_us) |
||||||
|
: WriteControllerToken(controller), delay_us_(delay_us) {} |
||||||
|
virtual ~DelayWriteToken(); |
||||||
|
|
||||||
|
private: |
||||||
|
uint64_t delay_us_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,40 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
#include "db/write_controller.h" |
||||||
|
|
||||||
|
#include "util/testharness.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class WriteControllerTest {}; |
||||||
|
|
||||||
|
TEST(WriteControllerTest, SanityTest) { |
||||||
|
WriteController controller; |
||||||
|
auto stop_token_1 = controller.GetStopToken(); |
||||||
|
auto stop_token_2 = controller.GetStopToken(); |
||||||
|
|
||||||
|
ASSERT_EQ(true, controller.IsStopped()); |
||||||
|
stop_token_1.reset(); |
||||||
|
ASSERT_EQ(true, controller.IsStopped()); |
||||||
|
stop_token_2.reset(); |
||||||
|
ASSERT_EQ(false, controller.IsStopped()); |
||||||
|
|
||||||
|
auto delay_token_1 = controller.GetDelayToken(5); |
||||||
|
ASSERT_EQ(static_cast<uint64_t>(5), controller.GetDelay()); |
||||||
|
auto delay_token_2 = controller.GetDelayToken(8); |
||||||
|
ASSERT_EQ(static_cast<uint64_t>(13), controller.GetDelay()); |
||||||
|
|
||||||
|
delay_token_2.reset(); |
||||||
|
ASSERT_EQ(static_cast<uint64_t>(5), controller.GetDelay()); |
||||||
|
delay_token_1.reset(); |
||||||
|
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay()); |
||||||
|
delay_token_1.reset(); |
||||||
|
ASSERT_EQ(false, controller.IsStopped()); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
@ -0,0 +1,147 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "db/write_thread.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
Status WriteThread::EnterWriteThread(WriteThread::Writer* w, |
||||||
|
uint64_t expiration_time) { |
||||||
|
// the following code block pushes the current writer "w" into the writer
|
||||||
|
// queue "writers_" and wait until one of the following conditions met:
|
||||||
|
// 1. the job of "w" has been done by some other writers.
|
||||||
|
// 2. "w" becomes the first writer in "writers_"
|
||||||
|
// 3. "w" timed-out.
|
||||||
|
writers_.push_back(w); |
||||||
|
|
||||||
|
bool timed_out = false; |
||||||
|
while (!w->done && w != writers_.front()) { |
||||||
|
if (expiration_time == 0) { |
||||||
|
w->cv.Wait(); |
||||||
|
} else if (w->cv.TimedWait(expiration_time)) { |
||||||
|
if (w->in_batch_group) { |
||||||
|
// then it means the front writer is currently doing the
|
||||||
|
// write on behalf of this "timed-out" writer. Then it
|
||||||
|
// should wait until the write completes.
|
||||||
|
expiration_time = 0; |
||||||
|
} else { |
||||||
|
timed_out = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (timed_out) { |
||||||
|
#ifndef NDEBUG |
||||||
|
bool found = false; |
||||||
|
#endif |
||||||
|
for (auto iter = writers_.begin(); iter != writers_.end(); iter++) { |
||||||
|
if (*iter == w) { |
||||||
|
writers_.erase(iter); |
||||||
|
#ifndef NDEBUG |
||||||
|
found = true; |
||||||
|
#endif |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
#ifndef NDEBUG |
||||||
|
assert(found); |
||||||
|
#endif |
||||||
|
// writers_.front() might still be in cond_wait without a time-out.
|
||||||
|
// As a result, we need to signal it to wake it up. Otherwise no
|
||||||
|
// one else will wake him up, and RocksDB will hang.
|
||||||
|
if (!writers_.empty()) { |
||||||
|
writers_.front()->cv.Signal(); |
||||||
|
} |
||||||
|
return Status::TimedOut(); |
||||||
|
} |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
void WriteThread::ExitWriteThread(WriteThread::Writer* w, |
||||||
|
WriteThread::Writer* last_writer, |
||||||
|
Status status) { |
||||||
|
// Pop out the current writer and all writers being pushed before the
|
||||||
|
// current writer from the writer queue.
|
||||||
|
while (!writers_.empty()) { |
||||||
|
Writer* ready = writers_.front(); |
||||||
|
writers_.pop_front(); |
||||||
|
if (ready != w) { |
||||||
|
ready->status = status; |
||||||
|
ready->done = true; |
||||||
|
ready->cv.Signal(); |
||||||
|
} |
||||||
|
if (ready == last_writer) break; |
||||||
|
} |
||||||
|
|
||||||
|
// Notify new head of write queue
|
||||||
|
if (!writers_.empty()) { |
||||||
|
writers_.front()->cv.Signal(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// This function will be called only when the first writer succeeds.
|
||||||
|
// All writers in the to-be-built batch group will be processed.
|
||||||
|
//
|
||||||
|
// REQUIRES: Writer list must be non-empty
|
||||||
|
// REQUIRES: First writer must have a non-nullptr batch
|
||||||
|
void WriteThread::BuildBatchGroup(WriteThread::Writer** last_writer, |
||||||
|
autovector<WriteBatch*>* write_batch_group) { |
||||||
|
assert(!writers_.empty()); |
||||||
|
Writer* first = writers_.front(); |
||||||
|
assert(first->batch != nullptr); |
||||||
|
|
||||||
|
size_t size = WriteBatchInternal::ByteSize(first->batch); |
||||||
|
write_batch_group->push_back(first->batch); |
||||||
|
|
||||||
|
// Allow the group to grow up to a maximum size, but if the
|
||||||
|
// original write is small, limit the growth so we do not slow
|
||||||
|
// down the small write too much.
|
||||||
|
size_t max_size = 1 << 20; |
||||||
|
if (size <= (128<<10)) { |
||||||
|
max_size = size + (128<<10); |
||||||
|
} |
||||||
|
|
||||||
|
*last_writer = first; |
||||||
|
std::deque<Writer*>::iterator iter = writers_.begin(); |
||||||
|
++iter; // Advance past "first"
|
||||||
|
for (; iter != writers_.end(); ++iter) { |
||||||
|
Writer* w = *iter; |
||||||
|
if (w->sync && !first->sync) { |
||||||
|
// Do not include a sync write into a batch handled by a non-sync write.
|
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (!w->disableWAL && first->disableWAL) { |
||||||
|
// Do not include a write that needs WAL into a batch that has
|
||||||
|
// WAL disabled.
|
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (w->timeout_hint_us < first->timeout_hint_us) { |
||||||
|
// Do not include those writes with shorter timeout. Otherwise, we might
|
||||||
|
// execute a write that should instead be aborted because of timeout.
|
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (w->batch == nullptr) { |
||||||
|
// Do not include those writes with nullptr batch. Those are not writes,
|
||||||
|
// those are something else. They want to be alone
|
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
size += WriteBatchInternal::ByteSize(w->batch); |
||||||
|
if (size > max_size) { |
||||||
|
// Do not make batch too big
|
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
write_batch_group->push_back(w->batch); |
||||||
|
w->in_batch_group = true; |
||||||
|
*last_writer = w; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,80 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <deque> |
||||||
|
#include <limits> |
||||||
|
#include "rocksdb/status.h" |
||||||
|
#include "db/write_batch_internal.h" |
||||||
|
#include "util/autovector.h" |
||||||
|
#include "port/port.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class WriteThread { |
||||||
|
public: |
||||||
|
static const uint64_t kNoTimeOut = std::numeric_limits<uint64_t>::max(); |
||||||
|
// Information kept for every waiting writer
|
||||||
|
struct Writer { |
||||||
|
Status status; |
||||||
|
WriteBatch* batch; |
||||||
|
bool sync; |
||||||
|
bool disableWAL; |
||||||
|
bool in_batch_group; |
||||||
|
bool done; |
||||||
|
uint64_t timeout_hint_us; |
||||||
|
port::CondVar cv; |
||||||
|
|
||||||
|
explicit Writer(port::Mutex* mu) |
||||||
|
: batch(nullptr), |
||||||
|
sync(false), |
||||||
|
disableWAL(false), |
||||||
|
in_batch_group(false), |
||||||
|
done(false), |
||||||
|
timeout_hint_us(kNoTimeOut), |
||||||
|
cv(mu) {} |
||||||
|
}; |
||||||
|
|
||||||
|
WriteThread() = default; |
||||||
|
~WriteThread() = default; |
||||||
|
|
||||||
|
// Before applying write operation (such as DBImpl::Write, DBImpl::Flush)
|
||||||
|
// thread should grab the mutex_ and be the first on writers queue.
|
||||||
|
// EnterWriteThread is used for it.
|
||||||
|
// Be aware! Writer's job can be done by other thread (see DBImpl::Write
|
||||||
|
// for examples), so check it via w.done before applying changes.
|
||||||
|
//
|
||||||
|
// Writer* w: writer to be placed in the queue
|
||||||
|
// uint64_t expiration_time: maximum time to be in the queue
|
||||||
|
// See also: ExitWriteThread
|
||||||
|
// REQUIRES: db mutex held
|
||||||
|
Status EnterWriteThread(Writer* w, uint64_t expiration_time); |
||||||
|
|
||||||
|
// After doing write job, we need to remove already used writers from
|
||||||
|
// writers_ queue and notify head of the queue about it.
|
||||||
|
// ExitWriteThread is used for this.
|
||||||
|
//
|
||||||
|
// Writer* w: Writer, that was added by EnterWriteThread function
|
||||||
|
// Writer* last_writer: Since we can join a few Writers (as DBImpl::Write
|
||||||
|
// does)
|
||||||
|
// we should pass last_writer as a parameter to
|
||||||
|
// ExitWriteThread
|
||||||
|
// (if you don't touch other writers, just pass w)
|
||||||
|
// Status status: Status of write operation
|
||||||
|
// See also: EnterWriteThread
|
||||||
|
// REQUIRES: db mutex held
|
||||||
|
void ExitWriteThread(Writer* w, Writer* last_writer, Status status); |
||||||
|
|
||||||
|
void BuildBatchGroup(Writer** last_writer, |
||||||
|
autovector<WriteBatch*>* write_batch_group); |
||||||
|
|
||||||
|
private: |
||||||
|
// Queue of writers.
|
||||||
|
std::deque<Writer*> writers_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,84 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <vector> |
||||||
|
#include "rocksdb/options.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// ImmutableCFOptions is a data struct used by RocksDB internal. It contains a
|
||||||
|
// subset of Options that should not be changed during the entire lifetime
|
||||||
|
// of DB. You shouldn't need to access this data structure unless you are
|
||||||
|
// implementing a new TableFactory. Raw pointers defined in this struct do
|
||||||
|
// not have ownership to the data they point to. Options contains shared_ptr
|
||||||
|
// to these data.
|
||||||
|
struct ImmutableCFOptions { |
||||||
|
explicit ImmutableCFOptions(const Options& options); |
||||||
|
|
||||||
|
CompactionStyle compaction_style; |
||||||
|
|
||||||
|
CompactionOptionsUniversal compaction_options_universal; |
||||||
|
|
||||||
|
const SliceTransform* prefix_extractor; |
||||||
|
|
||||||
|
const Comparator* comparator; |
||||||
|
|
||||||
|
MergeOperator* merge_operator; |
||||||
|
|
||||||
|
const CompactionFilter* compaction_filter; |
||||||
|
|
||||||
|
CompactionFilterFactory* compaction_filter_factory; |
||||||
|
|
||||||
|
CompactionFilterFactoryV2* compaction_filter_factory_v2; |
||||||
|
|
||||||
|
Logger* info_log; |
||||||
|
|
||||||
|
Statistics* statistics; |
||||||
|
|
||||||
|
InfoLogLevel info_log_level; |
||||||
|
|
||||||
|
Env* env; |
||||||
|
|
||||||
|
// Allow the OS to mmap file for reading sst tables. Default: false
|
||||||
|
bool allow_mmap_reads; |
||||||
|
|
||||||
|
// Allow the OS to mmap file for writing. Default: false
|
||||||
|
bool allow_mmap_writes; |
||||||
|
|
||||||
|
std::vector<DbPath> db_paths; |
||||||
|
|
||||||
|
MemTableRepFactory* memtable_factory; |
||||||
|
|
||||||
|
TableFactory* table_factory; |
||||||
|
|
||||||
|
Options::TablePropertiesCollectorFactories |
||||||
|
table_properties_collector_factories; |
||||||
|
|
||||||
|
bool advise_random_on_open; |
||||||
|
|
||||||
|
// This options is required by PlainTableReader. May need to move it
|
||||||
|
// to PlainTalbeOptions just like bloom_bits_per_key
|
||||||
|
uint32_t bloom_locality; |
||||||
|
|
||||||
|
bool purge_redundant_kvs_while_flush; |
||||||
|
|
||||||
|
uint32_t min_partial_merge_operands; |
||||||
|
|
||||||
|
bool disable_data_sync; |
||||||
|
|
||||||
|
bool use_fsync; |
||||||
|
|
||||||
|
CompressionType compression; |
||||||
|
|
||||||
|
std::vector<CompressionType> compression_per_level; |
||||||
|
|
||||||
|
CompressionOptions compression_opts; |
||||||
|
|
||||||
|
Options::AccessHint access_hint_on_compaction_start; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,105 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A WriteBatchWithIndex with a binary searchable index built for all the keys
|
||||||
|
// inserted.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include "rocksdb/comparator.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/status.h" |
||||||
|
#include "rocksdb/write_batch.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class ColumnFamilyHandle; |
||||||
|
struct SliceParts; |
||||||
|
class Comparator; |
||||||
|
|
||||||
|
enum WriteType { kPutRecord, kMergeRecord, kDeleteRecord, kLogDataRecord }; |
||||||
|
|
||||||
|
// an entry for Put, Merge or Delete entry for write batches. Used in
|
||||||
|
// WBWIIterator.
|
||||||
|
struct WriteEntry { |
||||||
|
WriteType type; |
||||||
|
Slice key; |
||||||
|
Slice value; |
||||||
|
}; |
||||||
|
|
||||||
|
// Iterator of one column family out of a WriteBatchWithIndex.
|
||||||
|
class WBWIIterator { |
||||||
|
public: |
||||||
|
virtual ~WBWIIterator() {} |
||||||
|
|
||||||
|
virtual bool Valid() const = 0; |
||||||
|
|
||||||
|
virtual void Seek(const Slice& key) = 0; |
||||||
|
|
||||||
|
virtual void Next() = 0; |
||||||
|
|
||||||
|
virtual const WriteEntry& Entry() const = 0; |
||||||
|
|
||||||
|
virtual Status status() const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// A WriteBatchWithIndex with a binary searchable index built for all the keys
|
||||||
|
// inserted.
|
||||||
|
// In Put(), Merge() or Delete(), the same function of the wrapped will be
|
||||||
|
// called. At the same time, indexes will be built.
|
||||||
|
// By calling GetWriteBatch(), a user will get the WriteBatch for the data
|
||||||
|
// they inserted, which can be used for DB::Write().
|
||||||
|
// A user can call NewIterator() to create an iterator.
|
||||||
|
class WriteBatchWithIndex { |
||||||
|
public: |
||||||
|
// backup_index_comparator: the backup comparator used to compare keys
|
||||||
|
// within the same column family, if column family is not given in the
|
||||||
|
// interface, or we can't find a column family from the column family handle
|
||||||
|
// passed in, backup_index_comparator will be used for the column family.
|
||||||
|
// reserved_bytes: reserved bytes in underlying WriteBatch
|
||||||
|
explicit WriteBatchWithIndex( |
||||||
|
const Comparator* backup_index_comparator = BytewiseComparator(), |
||||||
|
size_t reserved_bytes = 0); |
||||||
|
virtual ~WriteBatchWithIndex(); |
||||||
|
|
||||||
|
WriteBatch* GetWriteBatch(); |
||||||
|
|
||||||
|
virtual void Put(ColumnFamilyHandle* column_family, const Slice& key, |
||||||
|
const Slice& value); |
||||||
|
|
||||||
|
virtual void Put(const Slice& key, const Slice& value); |
||||||
|
|
||||||
|
virtual void Merge(ColumnFamilyHandle* column_family, const Slice& key, |
||||||
|
const Slice& value); |
||||||
|
|
||||||
|
virtual void Merge(const Slice& key, const Slice& value); |
||||||
|
|
||||||
|
virtual void PutLogData(const Slice& blob); |
||||||
|
|
||||||
|
virtual void Delete(ColumnFamilyHandle* column_family, const Slice& key); |
||||||
|
virtual void Delete(const Slice& key); |
||||||
|
|
||||||
|
virtual void Delete(ColumnFamilyHandle* column_family, const SliceParts& key); |
||||||
|
|
||||||
|
virtual void Delete(const SliceParts& key); |
||||||
|
|
||||||
|
// Create an iterator of a column family. User can call iterator.Seek() to
|
||||||
|
// search to the next entry of or after a key. Keys will be iterated in the
|
||||||
|
// order given by index_comparator. For multiple updates on the same key,
|
||||||
|
// each update will be returned as a separate entry, in the order of update
|
||||||
|
// time.
|
||||||
|
virtual WBWIIterator* NewIterator(ColumnFamilyHandle* column_family); |
||||||
|
// Create an iterator of the default column family.
|
||||||
|
virtual WBWIIterator* NewIterator(); |
||||||
|
|
||||||
|
private: |
||||||
|
struct Rep; |
||||||
|
Rep* rep; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,210 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
/** |
||||||
|
* The config for plain table sst format. |
||||||
|
* |
||||||
|
* BlockBasedTable is a RocksDB's default SST file format. |
||||||
|
*/ |
||||||
|
public class BlockBasedTableConfig extends TableFormatConfig { |
||||||
|
|
||||||
|
public BlockBasedTableConfig() { |
||||||
|
noBlockCache_ = false; |
||||||
|
blockCacheSize_ = 8 * 1024 * 1024; |
||||||
|
blockSize_ = 4 * 1024; |
||||||
|
blockSizeDeviation_ =10; |
||||||
|
blockRestartInterval_ =16; |
||||||
|
wholeKeyFiltering_ = true; |
||||||
|
bitsPerKey_ = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Disable block cache. If this is set to true, |
||||||
|
* then no block cache should be used, and the block_cache should |
||||||
|
* point to a nullptr object. |
||||||
|
* Default: false |
||||||
|
* |
||||||
|
* @param noBlockCache if use block cache |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setNoBlockCache(boolean noBlockCache) { |
||||||
|
noBlockCache_ = noBlockCache; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return if block cache is disabled |
||||||
|
*/ |
||||||
|
public boolean noBlockCache() { |
||||||
|
return noBlockCache_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Set the amount of cache in bytes that will be used by RocksDB. |
||||||
|
* If cacheSize is non-positive, then cache will not be used. |
||||||
|
* DEFAULT: 8M |
||||||
|
* |
||||||
|
* @param blockCacheSize block cache size in bytes |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setBlockCacheSize(long blockCacheSize) { |
||||||
|
blockCacheSize_ = blockCacheSize; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return block cache size in bytes |
||||||
|
*/ |
||||||
|
public long blockCacheSize() { |
||||||
|
return blockCacheSize_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Controls the number of shards for the block cache. |
||||||
|
* This is applied only if cacheSize is set to non-negative. |
||||||
|
* |
||||||
|
* @param numShardBits the number of shard bits. The resulting |
||||||
|
* number of shards would be 2 ^ numShardBits. Any negative |
||||||
|
* number means use default settings." |
||||||
|
* @return the reference to the current option. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setCacheNumShardBits(int numShardBits) { |
||||||
|
numShardBits_ = numShardBits; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Returns the number of shard bits used in the block cache. |
||||||
|
* The resulting number of shards would be 2 ^ (returned value). |
||||||
|
* Any negative number means use default settings. |
||||||
|
* |
||||||
|
* @return the number of shard bits used in the block cache. |
||||||
|
*/ |
||||||
|
public int cacheNumShardBits() { |
||||||
|
return numShardBits_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Approximate size of user data packed per block. Note that the |
||||||
|
* block size specified here corresponds to uncompressed data. The |
||||||
|
* actual size of the unit read from disk may be smaller if |
||||||
|
* compression is enabled. This parameter can be changed dynamically. |
||||||
|
* Default: 4K |
||||||
|
* |
||||||
|
* @param blockSize block size in bytes |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setBlockSize(long blockSize) { |
||||||
|
blockSize_ = blockSize; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return block size in bytes |
||||||
|
*/ |
||||||
|
public long blockSize() { |
||||||
|
return blockSize_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* This is used to close a block before it reaches the configured |
||||||
|
* 'block_size'. If the percentage of free space in the current block is less |
||||||
|
* than this specified number and adding a new record to the block will |
||||||
|
* exceed the configured block size, then this block will be closed and the |
||||||
|
* new record will be written to the next block. |
||||||
|
* Default is 10. |
||||||
|
* |
||||||
|
* @param blockSizeDeviation the deviation to block size allowed |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setBlockSizeDeviation(int blockSizeDeviation) { |
||||||
|
blockSizeDeviation_ = blockSizeDeviation; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return the hash table ratio. |
||||||
|
*/ |
||||||
|
public int blockSizeDeviation() { |
||||||
|
return blockSizeDeviation_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Set block restart interval |
||||||
|
* |
||||||
|
* @param restartInterval block restart interval. |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setBlockRestartInterval(int restartInterval) { |
||||||
|
blockRestartInterval_ = restartInterval; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return block restart interval |
||||||
|
*/ |
||||||
|
public int blockRestartInterval() { |
||||||
|
return blockRestartInterval_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* If true, place whole keys in the filter (not just prefixes). |
||||||
|
* This must generally be true for gets to be efficient. |
||||||
|
* Default: true |
||||||
|
* |
||||||
|
* @param wholeKeyFiltering if enable whole key filtering |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setWholeKeyFiltering(boolean wholeKeyFiltering) { |
||||||
|
wholeKeyFiltering_ = wholeKeyFiltering; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return if whole key filtering is enabled |
||||||
|
*/ |
||||||
|
public boolean wholeKeyFiltering() { |
||||||
|
return wholeKeyFiltering_; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Use the specified filter policy to reduce disk reads. |
||||||
|
* |
||||||
|
* Filter should not be disposed before options instances using this filter is |
||||||
|
* disposed. If dispose() function is not called, then filter object will be |
||||||
|
* GC'd automatically. |
||||||
|
* |
||||||
|
* Filter instance can be re-used in multiple options instances. |
||||||
|
* |
||||||
|
* @param Filter policy java instance. |
||||||
|
* @return the reference to the current config. |
||||||
|
*/ |
||||||
|
public BlockBasedTableConfig setFilterBitsPerKey(int bitsPerKey) { |
||||||
|
bitsPerKey_ = bitsPerKey; |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
@Override protected long newTableFactoryHandle() { |
||||||
|
return newTableFactoryHandle(noBlockCache_, blockCacheSize_, numShardBits_, |
||||||
|
blockSize_, blockSizeDeviation_, blockRestartInterval_, |
||||||
|
wholeKeyFiltering_, bitsPerKey_); |
||||||
|
} |
||||||
|
|
||||||
|
private native long newTableFactoryHandle( |
||||||
|
boolean noBlockCache, long blockCacheSize, int numShardbits, |
||||||
|
long blockSize, int blockSizeDeviation, int blockRestartInterval, |
||||||
|
boolean wholeKeyFiltering, int bitsPerKey); |
||||||
|
|
||||||
|
private boolean noBlockCache_; |
||||||
|
private long blockCacheSize_; |
||||||
|
private int numShardBits_; |
||||||
|
private long shard; |
||||||
|
private long blockSize_; |
||||||
|
private int blockSizeDeviation_; |
||||||
|
private int blockRestartInterval_; |
||||||
|
private boolean wholeKeyFiltering_; |
||||||
|
private int bitsPerKey_; |
||||||
|
} |
@ -0,0 +1,36 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
/** |
||||||
|
* Config for rate limiter, which is used to control write rate of flush and |
||||||
|
* compaction. |
||||||
|
*/ |
||||||
|
public class GenericRateLimiterConfig extends RateLimiterConfig { |
||||||
|
private static final long DEFAULT_REFILL_PERIOD_MICROS = (100 * 1000); |
||||||
|
private static final int DEFAULT_FAIRNESS = 10; |
||||||
|
|
||||||
|
public GenericRateLimiterConfig(long rateBytesPerSecond, |
||||||
|
long refillPeriodMicros, int fairness) { |
||||||
|
rateBytesPerSecond_ = rateBytesPerSecond; |
||||||
|
refillPeriodMicros_ = refillPeriodMicros; |
||||||
|
fairness_ = fairness; |
||||||
|
} |
||||||
|
|
||||||
|
public GenericRateLimiterConfig(long rateBytesPerSecond) { |
||||||
|
this(rateBytesPerSecond, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS); |
||||||
|
} |
||||||
|
|
||||||
|
@Override protected long newRateLimiterHandle() { |
||||||
|
return newRateLimiterHandle(rateBytesPerSecond_, refillPeriodMicros_, |
||||||
|
fairness_); |
||||||
|
} |
||||||
|
|
||||||
|
private native long newRateLimiterHandle(long rateBytesPerSecond, |
||||||
|
long refillPeriodMicros, int fairness); |
||||||
|
private final long rateBytesPerSecond_; |
||||||
|
private final long refillPeriodMicros_; |
||||||
|
private final int fairness_; |
||||||
|
} |
@ -0,0 +1,20 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
package org.rocksdb; |
||||||
|
|
||||||
|
/** |
||||||
|
* Config for rate limiter, which is used to control write rate of flush and |
||||||
|
* compaction. |
||||||
|
*/ |
||||||
|
public abstract class RateLimiterConfig { |
||||||
|
/** |
||||||
|
* This function should only be called by Options.setRateLimiter(), |
||||||
|
* which will create a c++ shared-pointer to the c++ RateLimiter |
||||||
|
* that is associated with the Java RateLimtierConifg. |
||||||
|
* |
||||||
|
* @see Options.setRateLimiter() |
||||||
|
*/ |
||||||
|
abstract protected long newRateLimiterHandle(); |
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// This file implements the "bridge" between Java and C++ for RateLimiter.
|
||||||
|
|
||||||
|
#include "rocksjni/portal.h" |
||||||
|
#include "include/org_rocksdb_GenericRateLimiterConfig.h" |
||||||
|
#include "rocksdb/rate_limiter.h" |
||||||
|
|
||||||
|
/*
|
||||||
|
* Class: org_rocksdb_GenericRateLimiterConfig |
||||||
|
* Method: newRateLimiterHandle |
||||||
|
* Signature: (JJI)J |
||||||
|
*/ |
||||||
|
jlong Java_org_rocksdb_GenericRateLimiterConfig_newRateLimiterHandle( |
||||||
|
JNIEnv* env, jobject jobj, jlong jrate_bytes_per_second, |
||||||
|
jlong jrefill_period_micros, jint jfairness) { |
||||||
|
return reinterpret_cast<jlong>(rocksdb::NewGenericRateLimiter( |
||||||
|
rocksdb::jlong_to_size_t(jrate_bytes_per_second), |
||||||
|
rocksdb::jlong_to_size_t(jrefill_period_micros), |
||||||
|
static_cast<int32_t>(jfairness))); |
||||||
|
} |
@ -0,0 +1,101 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
|
// into a single filter block.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string> |
||||||
|
#include <memory> |
||||||
|
#include <vector> |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "table/filter_block.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
|
||||||
|
// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
//
|
||||||
|
// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp:
|
||||||
|
// (StartBlock Add*)* Finish
|
||||||
|
class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { |
||||||
|
public: |
||||||
|
BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt); |
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return true; } |
||||||
|
virtual void StartBlock(uint64_t block_offset) override; |
||||||
|
virtual void Add(const Slice& key) override; |
||||||
|
virtual Slice Finish() override; |
||||||
|
|
||||||
|
private: |
||||||
|
void AddKey(const Slice& key); |
||||||
|
void AddPrefix(const Slice& key); |
||||||
|
void GenerateFilter(); |
||||||
|
|
||||||
|
// important: all of these might point to invalid addresses
|
||||||
|
// at the time of destruction of this filter block. destructor
|
||||||
|
// should NOT dereference them.
|
||||||
|
const FilterPolicy* policy_; |
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
|
||||||
|
std::string entries_; // Flattened entry contents
|
||||||
|
std::vector<size_t> start_; // Starting index in entries_ of each entry
|
||||||
|
uint32_t added_to_start_; // To indicate if key is added
|
||||||
|
std::string result_; // Filter data computed so far
|
||||||
|
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
|
||||||
|
std::vector<uint32_t> filter_offsets_; |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&); |
||||||
|
void operator=(const BlockBasedFilterBlockBuilder&); |
||||||
|
}; |
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
class BlockBasedFilterBlockReader : public FilterBlockReader { |
||||||
|
public: |
||||||
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
|
BlockBasedFilterBlockReader(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
BlockContents&& contents); |
||||||
|
virtual bool IsBlockBased() override { return true; } |
||||||
|
virtual bool KeyMayMatch(const Slice& key, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual bool PrefixMayMatch(const Slice& prefix, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual size_t ApproximateMemoryUsage() const override; |
||||||
|
|
||||||
|
private: |
||||||
|
const FilterPolicy* policy_; |
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
const char* data_; // Pointer to filter data (at block-start)
|
||||||
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
|
size_t num_; // Number of entries in offset array
|
||||||
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
BlockContents contents_; |
||||||
|
|
||||||
|
bool MayMatch(const Slice& entry, uint64_t block_offset); |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&); |
||||||
|
void operator=(const BlockBasedFilterBlockReader&); |
||||||
|
}; |
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,242 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/block_based_filter_block.h" |
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// For testing: emit an array with one hash value per key
|
||||||
|
class TestHashFilter : public FilterPolicy { |
||||||
|
public: |
||||||
|
virtual const char* Name() const { |
||||||
|
return "TestHashFilter"; |
||||||
|
} |
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||||
|
for (int i = 0; i < n; i++) { |
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||||
|
PutFixed32(dst, h); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1); |
||||||
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||||
|
if (h == DecodeFixed32(filter.data() + i)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
class FilterBlockTest { |
||||||
|
public: |
||||||
|
TestHashFilter policy_; |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
FilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(new TestHashFilter()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(FilterBlockTest, EmptyBuilder) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
BlockContents block(builder.Finish(), false, kNoCompression); |
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, std::move(block)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100000)); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, SingleChunk) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
builder.StartBlock(100); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.Add("bar"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.StartBlock(200); |
||||||
|
builder.Add("box"); |
||||||
|
builder.StartBlock(300); |
||||||
|
builder.Add("hello"); |
||||||
|
BlockContents block(builder.Finish(), false, kNoCompression); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, std::move(block)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing", 100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other", 100)); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, MultiChunk) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder.StartBlock(0); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.StartBlock(2000); |
||||||
|
builder.Add("bar"); |
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder.StartBlock(3100); |
||||||
|
builder.Add("box"); |
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder.StartBlock(9000); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("hello"); |
||||||
|
|
||||||
|
BlockContents block(builder.Finish(), false, kNoCompression); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, std::move(block)); |
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 2000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 0)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 0)); |
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 3100)); |
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 4100)); |
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 9000)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 9000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 9000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 9000)); |
||||||
|
} |
||||||
|
|
||||||
|
// Test for block based filter block
|
||||||
|
// use new interface in FilterPolicy to create filter builder/reader
|
||||||
|
class BlockBasedFilterBlockTest { |
||||||
|
public: |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
BlockBasedFilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10)); |
||||||
|
} |
||||||
|
|
||||||
|
~BlockBasedFilterBlockTest() {} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
BlockContents block(builder->Finish(), false, kNoCompression); |
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, std::move(block)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100000)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
builder->StartBlock(100); |
||||||
|
builder->Add("foo"); |
||||||
|
builder->Add("bar"); |
||||||
|
builder->Add("box"); |
||||||
|
builder->StartBlock(200); |
||||||
|
builder->Add("box"); |
||||||
|
builder->StartBlock(300); |
||||||
|
builder->Add("hello"); |
||||||
|
BlockContents block(builder->Finish(), false, kNoCompression); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, std::move(block)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("missing", 100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("other", 100)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder->StartBlock(0); |
||||||
|
builder->Add("foo"); |
||||||
|
builder->StartBlock(2000); |
||||||
|
builder->Add("bar"); |
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder->StartBlock(3100); |
||||||
|
builder->Add("box"); |
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder->StartBlock(9000); |
||||||
|
builder->Add("box"); |
||||||
|
builder->Add("hello"); |
||||||
|
|
||||||
|
BlockContents block(builder->Finish(), false, kNoCompression); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, std::move(block)); |
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 2000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 0)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 0)); |
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 3100)); |
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 4100)); |
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 9000)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 9000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 9000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 9000)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue