|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <queue>
|
|
|
|
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/iterator.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "db/dbformat.h"
|
|
|
|
#include "util/arena.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class DBImpl;
|
|
|
|
class Env;
|
|
|
|
struct SuperVersion;
|
|
|
|
class ColumnFamilyData;
|
|
|
|
class LevelIterator;
|
|
|
|
struct FileMetaData;
|
|
|
|
|
|
|
|
class MinIterComparator {
|
|
|
|
public:
|
|
|
|
explicit MinIterComparator(const Comparator* comparator) :
|
|
|
|
comparator_(comparator) {}
|
|
|
|
|
|
|
|
bool operator()(Iterator* a, Iterator* b) {
|
|
|
|
return comparator_->Compare(a->key(), b->key()) > 0;
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
const Comparator* comparator_;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef std::priority_queue<Iterator*,
|
|
|
|
std::vector<Iterator*>,
|
|
|
|
MinIterComparator> MinIterHeap;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ForwardIterator is a special type of iterator that only supports Seek()
|
|
|
|
* and Next(). It is expected to perform better than TailingIterator by
|
|
|
|
* removing the encapsulation and making all information accessible within
|
|
|
|
* the iterator. At the current implementation, snapshot is taken at the
|
|
|
|
* time Seek() is called. The Next() followed do not see new values after.
|
|
|
|
*/
|
|
|
|
class ForwardIterator : public Iterator {
|
|
|
|
public:
|
|
|
|
ForwardIterator(DBImpl* db, const ReadOptions& read_options,
|
|
|
|
ColumnFamilyData* cfd, SuperVersion* current_sv = nullptr);
|
|
|
|
virtual ~ForwardIterator();
|
|
|
|
|
|
|
|
void SeekToLast() override {
|
|
|
|
status_ = Status::NotSupported("ForwardIterator::SeekToLast()");
|
|
|
|
valid_ = false;
|
|
|
|
}
|
|
|
|
void Prev() override {
|
|
|
|
status_ = Status::NotSupported("ForwardIterator::Prev");
|
|
|
|
valid_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual bool Valid() const override;
|
|
|
|
void SeekToFirst() override;
|
|
|
|
virtual void Seek(const Slice& target) override;
|
|
|
|
virtual void Next() override;
|
|
|
|
virtual Slice key() const override;
|
|
|
|
virtual Slice value() const override;
|
|
|
|
virtual Status status() const override;
|
|
|
|
bool TEST_CheckDeletedIters();
|
|
|
|
|
|
|
|
private:
|
|
|
|
void Cleanup(bool release_sv);
|
|
|
|
void RebuildIterators(bool refresh_sv);
|
|
|
|
void ResetIncompleteIterators();
|
|
|
|
void SeekInternal(const Slice& internal_key, bool seek_to_first);
|
|
|
|
void UpdateCurrent();
|
|
|
|
bool NeedToSeekImmutable(const Slice& internal_key);
|
|
|
|
void DeleteCurrentIter();
|
|
|
|
uint32_t FindFileInRange(
|
|
|
|
const std::vector<FileMetaData*>& files, const Slice& internal_key,
|
|
|
|
uint32_t left, uint32_t right);
|
|
|
|
|
|
|
|
bool IsOverUpperBound(const Slice& internal_key) const;
|
|
|
|
|
|
|
|
DBImpl* const db_;
|
|
|
|
const ReadOptions read_options_;
|
|
|
|
ColumnFamilyData* const cfd_;
|
|
|
|
const SliceTransform* const prefix_extractor_;
|
|
|
|
const Comparator* user_comparator_;
|
|
|
|
MinIterHeap immutable_min_heap_;
|
|
|
|
|
|
|
|
SuperVersion* sv_;
|
|
|
|
Iterator* mutable_iter_;
|
|
|
|
std::vector<Iterator*> imm_iters_;
|
|
|
|
std::vector<Iterator*> l0_iters_;
|
|
|
|
std::vector<LevelIterator*> level_iters_;
|
|
|
|
Iterator* current_;
|
|
|
|
bool valid_;
|
|
|
|
|
|
|
|
// Internal iterator status; set only by one of the unsupported methods.
|
|
|
|
Status status_;
|
|
|
|
// Status of immutable iterators, maintained here to avoid iterating over
|
|
|
|
// all of them in status().
|
|
|
|
Status immutable_status_;
|
|
|
|
// Indicates that at least one of the immutable iterators pointed to a key
|
|
|
|
// larger than iterate_upper_bound and was therefore destroyed. Seek() may
|
|
|
|
// need to rebuild such iterators.
|
|
|
|
bool has_iter_trimmed_for_upper_bound_;
|
|
|
|
// Is current key larger than iterate_upper_bound? If so, makes Valid()
|
|
|
|
// return false.
|
|
|
|
bool current_over_upper_bound_;
|
|
|
|
|
|
|
|
// Left endpoint of the range of keys that immutable iterators currently
|
|
|
|
// cover. When Seek() is called with a key that's within that range, immutable
|
|
|
|
// iterators don't need to be moved; see NeedToSeekImmutable(). This key is
|
|
|
|
// included in the range after a Seek(), but excluded when advancing the
|
|
|
|
// iterator using Next().
|
|
|
|
IterKey prev_key_;
|
|
|
|
bool is_prev_set_;
|
ForwardIterator: update prev_key_ only if prefix hasn't changed
Summary:
Since ForwardIterator is on a level below DBIter, the latter may call Next() on
it (e.g. in order to skip deletion markers). Since this also updates
`prev_key_`, it may prevent the Seek() optimization.
For example, assume that there's only one SST file and it contains the following
entries: 0101, 0201 (`ValueType::kTypeDeletion`, i.e. a tombstone record), 0201
(`kTypeValue`), 0202. Memtable is empty. `Seek(0102)` will result in `prev_key_`
being set to `0201` instead of `0102`, since `DBIter::Seek()` will call
`ForwardIterator::Next()` to skip record 0201. Therefore, when `Seek(0102)` is
called again, `NeedToSeekImmutable()` will return true.
This fix relies on `prefix_extractor_` to detect prefix changes. `prev_key_` is
only set to `current_->key()` as long as they have the same prefix.
I also made a small change to `NeedToSeekImmutable()` so it no longer returns
true when the db is empty (i.e. there's nothing but a memtable).
Test Plan:
$ TEST_TMPDIR=/dev/shm/rocksdbtest ROCKSDB_TESTS=TailingIterator ./db_test
Reviewers: sdong, igor, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D23823
10 years ago
|
|
|
bool is_prev_inclusive_;
|
|
|
|
|
|
|
|
Arena arena_;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
#endif // ROCKSDB_LITE
|