Merge remote-tracking branch 'upstream/master'

main
wankai 11 years ago
commit 4c2b1f097b
  1. 3
      .travis.yml
  2. 11
      HISTORY.md
  3. 5
      Makefile
  4. 34
      build_tools/regression_build_test.sh
  5. 40
      db/builder.cc
  6. 11
      db/builder.h
  7. 7
      db/c.cc
  8. 15
      db/column_family.cc
  9. 13
      db/column_family.h
  10. 3
      db/compaction.cc
  11. 3
      db/compaction_picker.cc
  12. 26
      db/cuckoo_table_db_test.cc
  13. 6
      db/db_bench.cc
  14. 23
      db/db_filesnapshot.cc
  15. 520
      db/db_impl.cc
  16. 41
      db/db_impl.h
  17. 32
      db/db_impl_debug.cc
  18. 2
      db/db_impl_readonly.cc
  19. 113
      db/db_iter.cc
  20. 5
      db/db_iter.h
  21. 290
      db/db_test.cc
  22. 2
      db/dbformat.h
  23. 3
      db/filename.cc
  24. 10
      db/forward_iterator.cc
  25. 2
      db/forward_iterator.h
  26. 4
      db/internal_stats.cc
  27. 10
      db/memtable.cc
  28. 3
      db/memtable.h
  29. 5
      db/memtable_list.cc
  30. 2
      db/memtable_list.h
  31. 22
      db/plain_table_db_test.cc
  32. 37
      db/repair.cc
  33. 69
      db/simple_table_db_test.cc
  34. 2
      db/snapshot.h
  35. 43
      db/table_cache.cc
  36. 11
      db/table_cache.h
  37. 13
      db/table_properties_collector_test.cc
  38. 6
      db/version_edit.h
  39. 28
      db/version_set.cc
  40. 2
      db/version_set.h
  41. 5
      db/write_batch_test.cc
  42. 4
      include/rocksdb/c.h
  43. 62
      include/rocksdb/immutable_options.h
  44. 32
      include/rocksdb/options.h
  45. 23
      include/rocksdb/table.h
  46. 3
      include/rocksdb/utilities/backupable_db.h
  47. 6
      java/rocksjni/write_batch.cc
  48. 18
      table/adaptive_table_factory.cc
  49. 24
      table/adaptive_table_factory.h
  50. 49
      table/block_based_table_builder.cc
  51. 6
      table/block_based_table_builder.h
  52. 13
      table/block_based_table_factory.cc
  53. 17
      table/block_based_table_factory.h
  54. 67
      table/block_based_table_reader.cc
  55. 5
      table/block_based_table_reader.h
  56. 4
      table/block_prefix_index.cc
  57. 2
      table/block_prefix_index.h
  58. 5
      table/cuckoo_table_builder.cc
  59. 41
      table/cuckoo_table_builder_test.cc
  60. 18
      table/cuckoo_table_factory.cc
  61. 7
      table/cuckoo_table_factory.h
  62. 6
      table/cuckoo_table_reader.cc
  63. 5
      table/cuckoo_table_reader.h
  64. 18
      table/cuckoo_table_reader_test.cc
  65. 9
      table/filter_block.cc
  66. 5
      table/filter_block.h
  67. 18
      table/filter_block_test.cc
  68. 29
      table/plain_table_builder.cc
  69. 4
      table/plain_table_builder.h
  70. 14
      table/plain_table_factory.cc
  71. 21
      table/plain_table_factory.h
  72. 8
      table/plain_table_index.cc
  73. 8
      table/plain_table_index.h
  74. 51
      table/plain_table_reader.cc
  75. 10
      table/plain_table_reader.h
  76. 8
      table/table_reader_bench.cc
  77. 139
      table/table_test.cc
  78. 6
      tools/sst_dump.cc
  79. 257
      util/cache_bench.cc
  80. 3
      util/db_info_dummper.cc
  81. 3
      util/dynamic_bloom_test.cc
  82. 8
      util/env_test.cc
  83. 2
      util/histogram.cc
  84. 6
      util/histogram.h
  85. 4
      util/ldb_cmd.cc
  86. 3
      util/logging.cc
  87. 24
      util/options.cc
  88. 3
      util/options_test.cc
  89. 3
      util/rate_limiter_test.cc
  90. 28
      util/scoped_arena_iterator.h
  91. 3
      util/statistics.cc
  92. 2
      utilities/backupable/backupable_db.cc
  93. 4
      utilities/backupable/backupable_db_test.cc
  94. 1
      utilities/document/document_db.cc
  95. 3
      utilities/document/json_document.cc
  96. 2
      utilities/geodb/geodb_impl.cc
  97. 3
      utilities/spatialdb/spatial_db.cc

@ -14,7 +14,6 @@ before_install:
- sudo dpkg -i libgflags-dev_2.0-1_amd64.deb
# Lousy hack to disable use and testing of fallocate, which doesn't behave quite
# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
- sed -i "s/fallocate(/HACK_NO_fallocate(/" build_tools/build_detect_platform
script: make check -j8
script: OPT=-DTRAVIS make check -j8
notifications:
email: false

@ -1,6 +1,10 @@
# Rocksdb Change Log
### Unreleased
## Unreleased
----- Past Releases -----
## 3.5.0 (9/3/2014)
### New Features
* Add include/utilities/write_batch_with_index.h, providing a utilitiy class to query data out of WriteBatch when building it.
* Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include:
@ -11,15 +15,12 @@
### Public API changes
* The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key.
----- Past Releases -----
## 3.4.0 (8/18/2014)
### New Features
* Support Multiple DB paths in universal style compactions
* Add feature of storing plain table index and bloom filter in SST file.
* CompactRange() will never output compacted files to level 0. This used to be the case when all the compaction input files were at level 0.
* Added iterate_upper_bound to define the extent upto which the forward iterator will return entries. This will prevent iterating over delete markers and overwritten entries for edge cases where you want to break out the iterator anyways. This may improve perfomance in case there are a large number of delete markers or overwritten entries.
### Public API changes
* DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size

@ -132,7 +132,7 @@ TOOLS = \
options_test \
blob_store_bench
PROGRAMS = db_bench signal_test table_reader_bench log_and_apply_bench $(TOOLS)
PROGRAMS = db_bench signal_test table_reader_bench log_and_apply_bench cache_bench $(TOOLS)
# The library name is configurable since we are maintaining libraries of both
# debug/release mode.
@ -264,6 +264,9 @@ $(LIBRARY): $(LIBOBJECTS)
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
cache_bench: util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) util/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
block_hash_index_test: table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

@ -344,6 +344,38 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--threads=32 \
--writes_per_second=81920 > ${STAT_FILE}.seekwhilewriting_in_ram
# measure fillseq with bunch of column families
./db_bench \
--benchmarks=fillseq \
--num_column_families=500 \
--write_buffer_size=1048576 \
--db=$DATA_DIR \
--use_existing_db=0 \
--num=$NUM \
--writes=$NUM \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq_lots_column_families
# measure overwrite performance with bunch of column families
./db_bench \
--benchmarks=overwrite \
--num_column_families=500 \
--write_buffer_size=1048576 \
--db=$DATA_DIR \
--use_existing_db=1 \
--num=$NUM \
--writes=$((NUM / 10)) \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite_lots_column_families
# send data to ods
function send_to_ods {
@ -392,3 +424,5 @@ send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadr
send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting
send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram
send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram
send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families
send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families

@ -26,21 +26,24 @@ namespace rocksdb {
class TableFactory;
TableBuilder* NewTableBuilder(const Options& options,
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file,
CompressionType compression_type) {
return options.table_factory->NewTableBuilder(options, internal_comparator,
file, compression_type);
const CompressionType compression_type,
const CompressionOptions& compression_opts) {
return ioptions.table_factory->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
}
Status BuildTable(const std::string& dbname, Env* env, const Options& options,
const EnvOptions& soptions, TableCache* table_cache,
Status BuildTable(const std::string& dbname, Env* env,
const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, TableCache* table_cache,
Iterator* iter, FileMetaData* meta,
const InternalKeyComparator& internal_comparator,
const SequenceNumber newest_snapshot,
const SequenceNumber earliest_seqno_in_memtable,
const CompressionType compression,
const CompressionOptions& compression_opts,
const Env::IOPriority io_priority) {
Status s;
meta->fd.file_size = 0;
@ -50,23 +53,24 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
// If the sequence number of the smallest entry in the memtable is
// smaller than the most recent snapshot, then we do not trigger
// removal of duplicate/deleted keys as part of this builder.
bool purge = options.purge_redundant_kvs_while_flush;
bool purge = ioptions.purge_redundant_kvs_while_flush;
if (earliest_seqno_in_memtable <= newest_snapshot) {
purge = false;
}
std::string fname = TableFileName(options.db_paths, meta->fd.GetNumber(),
std::string fname = TableFileName(ioptions.db_paths, meta->fd.GetNumber(),
meta->fd.GetPathId());
if (iter->Valid()) {
unique_ptr<WritableFile> file;
s = env->NewWritableFile(fname, &file, soptions);
s = env->NewWritableFile(fname, &file, env_options);
if (!s.ok()) {
return s;
}
file->SetIOPriority(io_priority);
TableBuilder* builder =
NewTableBuilder(options, internal_comparator, file.get(), compression);
TableBuilder* builder = NewTableBuilder(
ioptions, internal_comparator, file.get(),
compression, compression_opts);
// the first key is the smallest key
Slice key = iter->key();
@ -75,8 +79,8 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
meta->largest_seqno = meta->smallest_seqno;
MergeHelper merge(internal_comparator.user_comparator(),
options.merge_operator.get(), options.info_log.get(),
options.min_partial_merge_operands,
ioptions.merge_operator, ioptions.info_log,
ioptions.min_partial_merge_operands,
true /* internal key corruption is not ok */);
if (purge) {
@ -196,12 +200,12 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
delete builder;
// Finish and check for file errors
if (s.ok() && !options.disableDataSync) {
if (options.use_fsync) {
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
if (s.ok() && !ioptions.disable_data_sync) {
if (ioptions.use_fsync) {
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
s = file->Fsync();
} else {
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
s = file->Sync();
}
}
@ -211,7 +215,7 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
if (s.ok()) {
// Verify that the table is usable
Iterator* it = table_cache->NewIterator(ReadOptions(), soptions,
Iterator* it = table_cache->NewIterator(ReadOptions(), env_options,
internal_comparator, meta->fd);
s = it->status();
delete it;

@ -11,6 +11,7 @@
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/options.h"
#include "rocksdb/immutable_options.h"
namespace rocksdb {
@ -26,8 +27,10 @@ class TableBuilder;
class WritableFile;
extern TableBuilder* NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type);
const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts);
// Build a Table file from the contents of *iter. The generated file
// will be named according to number specified in meta. On success, the rest of
@ -35,13 +38,15 @@ extern TableBuilder* NewTableBuilder(
// If no data is present in *iter, meta->file_size will be set to
// zero, and no Table file will be produced.
extern Status BuildTable(const std::string& dbname, Env* env,
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& options,
const EnvOptions& env_options,
TableCache* table_cache, Iterator* iter,
FileMetaData* meta,
const InternalKeyComparator& internal_comparator,
const SequenceNumber newest_snapshot,
const SequenceNumber earliest_seqno_in_memtable,
const CompressionType compression,
const CompressionOptions& compression_opts,
const Env::IOPriority io_priority = Env::IO_HIGH);
} // namespace rocksdb

@ -1844,6 +1844,13 @@ void rocksdb_readoptions_set_snapshot(
opt->rep.snapshot = (snap ? snap->rep : nullptr);
}
void rocksdb_readoptions_set_iterate_upper_bound(
rocksdb_readoptions_t* opt,
const char* key, size_t keylen) {
Slice prefix = Slice(key, keylen);
opt->rep.iterate_upper_bound = &prefix;
}
void rocksdb_readoptions_set_read_tier(
rocksdb_readoptions_t* opt, int v) {
opt->rep.read_tier = static_cast<rocksdb::ReadTier>(v);

@ -178,7 +178,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
Version* dummy_versions, Cache* table_cache,
const ColumnFamilyOptions& options,
const DBOptions* db_options,
const EnvOptions& storage_options,
const EnvOptions& env_options,
ColumnFamilySet* column_family_set)
: id_(id),
name_(name),
@ -188,6 +188,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
dropped_(false),
internal_comparator_(options.comparator),
options_(*db_options, SanitizeOptions(&internal_comparator_, options)),
ioptions_(options_),
mem_(nullptr),
imm_(options_.min_write_buffer_number_to_merge),
super_version_(nullptr),
@ -204,7 +205,7 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name,
if (dummy_versions != nullptr) {
internal_stats_.reset(
new InternalStats(options_.num_levels, db_options->env, this));
table_cache_.reset(new TableCache(&options_, storage_options, table_cache));
table_cache_.reset(new TableCache(ioptions_, env_options, table_cache));
if (options_.compaction_style == kCompactionStyleUniversal) {
compaction_picker_.reset(
new UniversalCompactionPicker(&options_, &internal_comparator_));
@ -306,7 +307,7 @@ void ColumnFamilyData::RecalculateWriteStallRateLimitsConditions() {
}
const EnvOptions* ColumnFamilyData::soptions() const {
return &(column_family_set_->storage_options_);
return &(column_family_set_->env_options_);
}
void ColumnFamilyData::SetCurrent(Version* current) {
@ -462,16 +463,16 @@ void ColumnFamilyData::ResetThreadLocalSuperVersions() {
ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
const DBOptions* db_options,
const EnvOptions& storage_options,
const EnvOptions& env_options,
Cache* table_cache)
: max_column_family_(0),
dummy_cfd_(new ColumnFamilyData(0, "", nullptr, nullptr,
ColumnFamilyOptions(), db_options,
storage_options_, nullptr)),
env_options_, nullptr)),
default_cfd_cache_(nullptr),
db_name_(dbname),
db_options_(db_options),
storage_options_(storage_options),
env_options_(env_options),
table_cache_(table_cache),
spin_lock_(ATOMIC_FLAG_INIT) {
// initialize linked list
@ -537,7 +538,7 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
assert(column_families_.find(name) == column_families_.end());
ColumnFamilyData* new_cfd =
new ColumnFamilyData(id, name, dummy_versions, table_cache_, options,
db_options_, storage_options_, this);
db_options_, env_options_, this);
Lock();
column_families_.insert({name, id});
column_family_data_.insert({id, new_cfd});

@ -165,9 +165,11 @@ class ColumnFamilyData {
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
uint64_t GetLogNumber() const { return log_number_; }
// thread-safe
// TODO(ljin): make this API thread-safe once we allow updating options_
const Options* options() const { return &options_; }
// thread-safe
const EnvOptions* soptions() const;
const ImmutableCFOptions* ioptions() const { return &ioptions_; }
InternalStats* internal_stats() { return internal_stats_.get(); }
@ -251,7 +253,7 @@ class ColumnFamilyData {
Version* dummy_versions, Cache* table_cache,
const ColumnFamilyOptions& options,
const DBOptions* db_options,
const EnvOptions& storage_options,
const EnvOptions& env_options,
ColumnFamilySet* column_family_set);
// Recalculate some small conditions, which are changed only during
@ -272,7 +274,8 @@ class ColumnFamilyData {
const InternalKeyComparator internal_comparator_;
Options const options_;
const Options options_;
const ImmutableCFOptions ioptions_;
std::unique_ptr<TableCache> table_cache_;
@ -367,7 +370,7 @@ class ColumnFamilySet {
};
ColumnFamilySet(const std::string& dbname, const DBOptions* db_options,
const EnvOptions& storage_options, Cache* table_cache);
const EnvOptions& env_options, Cache* table_cache);
~ColumnFamilySet();
ColumnFamilyData* GetDefault() const;
@ -420,7 +423,7 @@ class ColumnFamilySet {
const std::string db_name_;
const DBOptions* const db_options_;
const EnvOptions storage_options_;
const EnvOptions env_options_;
Cache* table_cache_;
std::atomic_flag spin_lock_;
};

@ -9,7 +9,10 @@
#include "db/compaction.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <vector>

@ -9,7 +9,10 @@
#include "db/compaction_picker.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <limits>
#include "db/filename.h"

@ -245,14 +245,38 @@ TEST(CuckooTableDBTest, CompactionTrigger) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
}
dbfull()->TEST_WaitForFlushMemTable();
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ("2", FilesPerLevel());
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ("0,2", FilesPerLevel());
for (int idx = 0; idx < 22; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
}
}
TEST(CuckooTableDBTest, CompactionIntoMultipleFiles) {
// Create a big L0 file and check it compacts into multiple files in L1.
Options options = CurrentOptions();
options.write_buffer_size = 270 << 10;
// Two SST files should be created, each containing 14 keys.
// Number of buckets will be 16. Total size ~156 KB.
options.target_file_size_base = 160 << 10;
Reopen(&options);
// Write 28 values, each 10016 B ~ 10KB
for (int idx = 0; idx < 28; ++idx) {
ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + idx)));
}
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ("1", FilesPerLevel());
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ("0,2", FilesPerLevel());
for (int idx = 0; idx < 28; ++idx) {
ASSERT_EQ(std::string(10000, 'a' + idx), Get(Key(idx)));
}
}
TEST(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) {
// Insert same key twice so that they go to different SST files. Then wait for
// compaction and check if the latest value is stored and old value removed.

@ -7,7 +7,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#ifndef GFLAGS
#include <cstdio>
@ -1108,6 +1110,8 @@ class Benchmark {
}
~Benchmark() {
std::for_each(db_.cfh.begin(), db_.cfh.end(),
[](ColumnFamilyHandle* cfh) { delete cfh; });
delete db_.db;
delete prefix_extractor_;
}
@ -1332,6 +1336,8 @@ class Benchmark {
method = nullptr;
} else {
if (db_.db != nullptr) {
std::for_each(db_.cfh.begin(), db_.cfh.end(),
[](ColumnFamilyHandle* cfh) { delete cfh; });
delete db_.db;
db_.db = nullptr;
db_.cfh.clear();

@ -9,7 +9,10 @@
#ifndef ROCKSDB_LITE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>
#include <string>
@ -29,9 +32,9 @@ Status DBImpl::DisableFileDeletions() {
MutexLock l(&mutex_);
++disable_delete_obsolete_files_;
if (disable_delete_obsolete_files_ == 1) {
Log(options_.info_log, "File Deletions Disabled");
Log(db_options_.info_log, "File Deletions Disabled");
} else {
Log(options_.info_log,
Log(db_options_.info_log,
"File Deletions Disabled, but already disabled. Counter: %d",
disable_delete_obsolete_files_);
}
@ -50,11 +53,11 @@ Status DBImpl::EnableFileDeletions(bool force) {
--disable_delete_obsolete_files_;
}
if (disable_delete_obsolete_files_ == 0) {
Log(options_.info_log, "File Deletions Enabled");
Log(db_options_.info_log, "File Deletions Enabled");
should_purge_files = true;
FindObsoleteFiles(deletion_state, true);
} else {
Log(options_.info_log,
Log(db_options_.info_log,
"File Deletions Enable, but not really enabled. Counter: %d",
disable_delete_obsolete_files_);
}
@ -62,7 +65,7 @@ Status DBImpl::EnableFileDeletions(bool force) {
if (should_purge_files) {
PurgeObsoleteFiles(deletion_state);
}
LogFlush(options_.info_log);
LogFlush(db_options_.info_log);
return Status::OK();
}
@ -95,7 +98,7 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
if (!status.ok()) {
mutex_.Unlock();
Log(options_.info_log, "Cannot Flush data %s\n",
Log(db_options_.info_log, "Cannot Flush data %s\n",
status.ToString().c_str());
return status;
}
@ -133,7 +136,7 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
Status s;
// list wal files in main db dir.
VectorLogPtr logs;
s = GetSortedWalsOfType(options_.wal_dir, logs, kAliveLogFile);
s = GetSortedWalsOfType(db_options_.wal_dir, logs, kAliveLogFile);
if (!s.ok()) {
return s;
}
@ -146,7 +149,7 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
files.clear();
// list wal files in archive dir.
std::string archivedir = ArchivalDirectory(options_.wal_dir);
std::string archivedir = ArchivalDirectory(db_options_.wal_dir);
if (env_->FileExists(archivedir)) {
s = GetSortedWalsOfType(archivedir, files, kArchivedLogFile);
if (!s.ok()) {
@ -157,7 +160,7 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
uint64_t latest_archived_log_number = 0;
if (!files.empty()) {
latest_archived_log_number = files.back()->LogNumber();
Log(options_.info_log, "Latest Archived log: %" PRIu64,
Log(db_options_.info_log, "Latest Archived log: %" PRIu64,
latest_archived_log_number);
}
@ -170,7 +173,7 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
// same log in both db dir and archived dir. Simply
// ignore the one in db dir. Note that, if we read
// archived dir first, we would have missed the log file.
Log(options_.info_log, "%s already moved to archive",
Log(db_options_.info_log, "%s already moved to archive",
log->PathName().c_str());
}
}

File diff suppressed because it is too large Load Diff

@ -30,6 +30,7 @@
#include "util/autovector.h"
#include "util/stop_watch.h"
#include "util/thread_local.h"
#include "util/scoped_arena_iterator.h"
#include "db/internal_stats.h"
namespace rocksdb {
@ -173,8 +174,8 @@ class DBImpl : public DB {
// Return an internal iterator over the current state of the database.
// The keys of this iterator are internal keys (see format.h).
// The returned iterator should be deleted when no longer needed.
Iterator* TEST_NewInternalIterator(ColumnFamilyHandle* column_family =
nullptr);
Iterator* TEST_NewInternalIterator(
Arena* arena, ColumnFamilyHandle* column_family = nullptr);
// Return the maximum overlapping data (in bytes) at next level for any
// file at a level >= 1.
@ -202,6 +203,17 @@ class DBImpl : public DB {
SequenceNumber* sequence);
Status TEST_ReadFirstLine(const std::string& fname, SequenceNumber* sequence);
void TEST_LockMutex();
void TEST_UnlockMutex();
// REQUIRES: mutex locked
void* TEST_BeginWrite();
// REQUIRES: mutex locked
// pass the pointer that you got from TEST_BeginWrite()
void TEST_EndWrite(void* w);
#endif // NDEBUG
// Structure to store information for candidate files to delete.
@ -275,7 +287,7 @@ class DBImpl : public DB {
// Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'candidate_files'.
// If force == false and the last call was less than
// options_.delete_obsolete_files_period_micros microseconds ago,
// db_options_.delete_obsolete_files_period_micros microseconds ago,
// it will not fill up the deletion_state
void FindObsoleteFiles(DeletionState& deletion_state,
bool force,
@ -293,12 +305,11 @@ class DBImpl : public DB {
Env* const env_;
const std::string dbname_;
unique_ptr<VersionSet> versions_;
const DBOptions options_;
const DBOptions db_options_;
Statistics* stats_;
Iterator* NewInternalIterator(const ReadOptions&, ColumnFamilyData* cfd,
SuperVersion* super_version,
Arena* arena = nullptr);
SuperVersion* super_version, Arena* arena);
private:
friend class DB;
@ -309,7 +320,7 @@ class DBImpl : public DB {
#endif
friend struct SuperVersion;
struct CompactionState;
struct Writer;
struct WriteContext;
Status NewDB();
@ -349,6 +360,20 @@ class DBImpl : public DB {
uint64_t SlowdownAmount(int n, double bottom, double top);
// Information kept for every waiting writer
struct Writer {
Status status;
WriteBatch* batch;
bool sync;
bool disableWAL;
bool in_batch_group;
bool done;
uint64_t timeout_hint_us;
port::CondVar cv;
explicit Writer(port::Mutex* mu) : cv(mu) {}
};
// Before applying write operation (such as DBImpl::Write, DBImpl::Flush)
// thread should grab the mutex_ and be the first on writers queue.
// BeginWrite is used for it.
@ -607,7 +632,7 @@ class DBImpl : public DB {
int delayed_writes_;
// The options to access storage files
const EnvOptions storage_options_;
const EnvOptions env_options_;
// A value of true temporarily disables scheduling of background work
bool bg_work_gate_closed_;

@ -20,7 +20,8 @@ uint64_t DBImpl::TEST_GetLevel0TotalSize() {
return default_cf_handle_->cfd()->current()->NumLevelBytes(0);
}
Iterator* DBImpl::TEST_NewInternalIterator(ColumnFamilyHandle* column_family) {
Iterator* DBImpl::TEST_NewInternalIterator(Arena* arena,
ColumnFamilyHandle* column_family) {
ColumnFamilyData* cfd;
if (column_family == nullptr) {
cfd = default_cf_handle_->cfd();
@ -33,7 +34,7 @@ Iterator* DBImpl::TEST_NewInternalIterator(ColumnFamilyHandle* column_family) {
SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
mutex_.Unlock();
ReadOptions roptions;
return NewInternalIterator(roptions, cfd, super_version);
return NewInternalIterator(roptions, cfd, super_version, arena);
}
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes(
@ -129,5 +130,32 @@ Status DBImpl::TEST_ReadFirstLine(const std::string& fname,
SequenceNumber* sequence) {
return ReadFirstLine(fname, sequence);
}
void DBImpl::TEST_LockMutex() {
mutex_.Lock();
}
void DBImpl::TEST_UnlockMutex() {
mutex_.Unlock();
}
void* DBImpl::TEST_BeginWrite() {
auto w = new Writer(&mutex_);
w->batch = nullptr;
w->sync = false;
w->disableWAL = false;
w->in_batch_group = false;
w->done = false;
w->timeout_hint_us = kNoTimeOut;
Status s = BeginWrite(w, 0);
assert(s.ok() && !w->done); // No timeout and nobody should do our job
return reinterpret_cast<void*>(w);
}
void DBImpl::TEST_EndWrite(void* w) {
auto writer = reinterpret_cast<Writer*>(w);
EndWrite(writer, writer, Status::OK());
}
} // namespace rocksdb
#endif // ROCKSDB_LITE

@ -44,7 +44,7 @@ namespace rocksdb {
DBImplReadOnly::DBImplReadOnly(const DBOptions& options,
const std::string& dbname)
: DBImpl(options, dbname) {
Log(options_.info_log, "Opening the db in read only mode");
Log(db_options_.info_log, "Opening the db in read only mode");
}
DBImplReadOnly::~DBImplReadOnly() {

@ -59,7 +59,8 @@ class DBIter: public Iterator {
};
DBIter(Env* env, const Options& options, const Comparator* cmp,
Iterator* iter, SequenceNumber s, bool arena_mode)
Iterator* iter, SequenceNumber s, bool arena_mode,
const Slice* iterate_upper_bound = nullptr)
: arena_mode_(arena_mode),
env_(env),
logger_(options.info_log.get()),
@ -70,9 +71,10 @@ class DBIter: public Iterator {
direction_(kForward),
valid_(false),
current_entry_is_merged_(false),
statistics_(options.statistics.get()) {
statistics_(options.statistics.get()),
iterate_upper_bound_(iterate_upper_bound) {
RecordTick(statistics_, NO_ITERATORS);
has_prefix_extractor_ = (options.prefix_extractor.get() != nullptr);
prefix_extractor_ = options.prefix_extractor.get();
max_skip_ = options.max_sequential_skip_in_iterations;
}
virtual ~DBIter() {
@ -132,7 +134,7 @@ class DBIter: public Iterator {
}
}
bool has_prefix_extractor_;
const SliceTransform* prefix_extractor_;
bool arena_mode_;
Env* const env_;
Logger* logger_;
@ -149,6 +151,7 @@ class DBIter: public Iterator {
bool current_entry_is_merged_;
Statistics* statistics_;
uint64_t max_skip_;
const Slice* iterate_upper_bound_;
// No copying allowed
DBIter(const DBIter&);
@ -207,36 +210,44 @@ void DBIter::FindNextUserEntryInternal(bool skipping) {
uint64_t num_skipped = 0;
do {
ParsedInternalKey ikey;
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
if (skipping &&
user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) <= 0) {
num_skipped++; // skip this entry
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
} else {
skipping = false;
switch (ikey.type) {
case kTypeDeletion:
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
saved_key_.SetKey(ikey.user_key);
skipping = true;
num_skipped = 0;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
break;
case kTypeValue:
valid_ = true;
saved_key_.SetKey(ikey.user_key);
return;
case kTypeMerge:
// By now, we are sure the current ikey is going to yield a value
saved_key_.SetKey(ikey.user_key);
current_entry_is_merged_ = true;
valid_ = true;
MergeValuesNewToOld(); // Go to a different state machine
return;
default:
assert(false);
break;
if (ParseKey(&ikey)) {
if (iterate_upper_bound_ != nullptr &&
ikey.user_key.compare(*iterate_upper_bound_) >= 0) {
break;
}
if (ikey.sequence <= sequence_) {
if (skipping &&
user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) <= 0) {
num_skipped++; // skip this entry
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
} else {
skipping = false;
switch (ikey.type) {
case kTypeDeletion:
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
saved_key_.SetKey(ikey.user_key);
skipping = true;
num_skipped = 0;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
break;
case kTypeValue:
valid_ = true;
saved_key_.SetKey(ikey.user_key);
return;
case kTypeMerge:
// By now, we are sure the current ikey is going to yield a value
saved_key_.SetKey(ikey.user_key);
current_entry_is_merged_ = true;
valid_ = true;
MergeValuesNewToOld(); // Go to a different state machine
return;
default:
assert(false);
break;
}
}
}
}
@ -398,6 +409,7 @@ bool DBIter::FindValueForCurrentKey() {
case kTypeDeletion:
operands.clear();
last_not_merge_type = kTypeDeletion;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
break;
case kTypeMerge:
assert(user_merge_operator_ != nullptr);
@ -407,6 +419,7 @@ bool DBIter::FindValueForCurrentKey() {
assert(false);
}
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
assert(user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) == 0);
iter_->Prev();
++num_skipped;
@ -553,6 +566,20 @@ void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) {
void DBIter::Seek(const Slice& target) {
StopWatch sw(env_, statistics_, DB_SEEK);
// total ordering is not guaranteed if prefix_extractor is set
// hence prefix based seeks will not give correct results
if (iterate_upper_bound_ != nullptr && prefix_extractor_ != nullptr) {
if (!prefix_extractor_->InDomain(*iterate_upper_bound_) ||
!prefix_extractor_->InDomain(target) ||
prefix_extractor_->Transform(*iterate_upper_bound_).compare(
prefix_extractor_->Transform(target)) != 0) {
status_ = Status::InvalidArgument("read_options.iterate_*_bound "
" and seek target need to have the same prefix.");
valid_ = false;
return;
}
}
saved_key_.Clear();
// now savved_key is used to store internal key.
saved_key_.SetInternalKey(target, sequence_);
@ -574,7 +601,7 @@ void DBIter::Seek(const Slice& target) {
void DBIter::SeekToFirst() {
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek wiil be used.
if (has_prefix_extractor_) {
if (prefix_extractor_ != nullptr) {
max_skip_ = std::numeric_limits<uint64_t>::max();
}
direction_ = kForward;
@ -595,7 +622,7 @@ void DBIter::SeekToFirst() {
void DBIter::SeekToLast() {
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek wiil be used.
if (has_prefix_extractor_) {
if (prefix_extractor_ != nullptr) {
max_skip_ = std::numeric_limits<uint64_t>::max();
}
direction_ = kReverse;
@ -612,9 +639,10 @@ void DBIter::SeekToLast() {
Iterator* NewDBIterator(Env* env, const Options& options,
const Comparator* user_key_comparator,
Iterator* internal_iter,
const SequenceNumber& sequence) {
const SequenceNumber& sequence,
const Slice* iterate_upper_bound) {
return new DBIter(env, options, user_key_comparator, internal_iter, sequence,
false);
false, iterate_upper_bound);
}
ArenaWrappedDBIter::~ArenaWrappedDBIter() { db_iter_->~DBIter(); }
@ -643,13 +671,16 @@ void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1,
ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const Options& options, const Comparator* user_key_comparator,
const SequenceNumber& sequence) {
const SequenceNumber& sequence,
const Slice* iterate_upper_bound) {
ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
Arena* arena = iter->GetArena();
auto mem = arena->AllocateAligned(sizeof(DBIter));
DBIter* db_iter = new (mem)
DBIter(env, options, user_key_comparator, nullptr, sequence, true);
DBIter* db_iter = new (mem) DBIter(env, options, user_key_comparator,
nullptr, sequence, true, iterate_upper_bound);
iter->SetDBIter(db_iter);
return iter;
}

@ -27,7 +27,8 @@ extern Iterator* NewDBIterator(
const Options& options,
const Comparator *user_key_comparator,
Iterator* internal_iter,
const SequenceNumber& sequence);
const SequenceNumber& sequence,
const Slice* iterate_upper_bound = nullptr);
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed be allocated. This class is used as an entry point of
@ -68,6 +69,6 @@ class ArenaWrappedDBIter : public Iterator {
// Generate the arena wrapped iterator class.
extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const Options& options, const Comparator* user_key_comparator,
const SequenceNumber& sequence);
const SequenceNumber& sequence, const Slice* iterate_upper_bound = nullptr);
} // namespace rocksdb

@ -11,6 +11,7 @@
#include <iostream>
#include <set>
#include <unistd.h>
#include <thread>
#include <unordered_set>
#include <utility>
@ -41,6 +42,7 @@
#include "util/rate_limiter.h"
#include "util/statistics.h"
#include "util/testharness.h"
#include "util/scoped_arena_iterator.h"
#include "util/sync_point.h"
#include "util/testutil.h"
@ -755,11 +757,12 @@ class DBTest {
}
std::string AllEntriesFor(const Slice& user_key, int cf = 0) {
Iterator* iter;
ScopedArenaIterator iter;
Arena arena;
if (cf == 0) {
iter = dbfull()->TEST_NewInternalIterator();
iter.set(dbfull()->TEST_NewInternalIterator(&arena));
} else {
iter = dbfull()->TEST_NewInternalIterator(handles_[cf]);
iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf]));
}
InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
iter->Seek(target.Encode());
@ -804,7 +807,6 @@ class DBTest {
}
result += "]";
}
delete iter;
return result;
}
@ -1042,11 +1044,12 @@ class DBTest {
// Utility method to test InplaceUpdate
void validateNumberOfEntries(int numValues, int cf = 0) {
Iterator* iter;
ScopedArenaIterator iter;
Arena arena;
if (cf != 0) {
iter = dbfull()->TEST_NewInternalIterator(handles_[cf]);
iter.set(dbfull()->TEST_NewInternalIterator(&arena, handles_[cf]));
} else {
iter = dbfull()->TEST_NewInternalIterator();
iter.set(dbfull()->TEST_NewInternalIterator(&arena));
}
iter->SeekToFirst();
ASSERT_EQ(iter->status().ok(), true);
@ -1060,7 +1063,6 @@ class DBTest {
ASSERT_EQ(ikey.sequence, (unsigned)seq--);
iter->Next();
}
delete iter;
ASSERT_EQ(0, seq);
}
@ -4210,22 +4212,25 @@ TEST(DBTest, CompactionFilter) {
// TODO: figure out sequence number squashtoo
int count = 0;
int total = 0;
Iterator* iter = dbfull()->TEST_NewInternalIterator(handles_[1]);
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
Arena arena;
{
ScopedArenaIterator iter(
dbfull()->TEST_NewInternalIterator(&arena, handles_[1]));
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
}
iter->Next();
}
iter->Next();
}
ASSERT_EQ(total, 100000);
ASSERT_EQ(count, 1);
delete iter;
// overwrite all the 100K keys once again.
for (int i = 0; i < 100000; i++) {
@ -4280,7 +4285,7 @@ TEST(DBTest, CompactionFilter) {
ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
// Scan the entire database to ensure that nothing is left
iter = db_->NewIterator(ReadOptions(), handles_[1]);
Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
iter->SeekToFirst();
count = 0;
while (iter->Valid()) {
@ -4296,18 +4301,20 @@ TEST(DBTest, CompactionFilter) {
// TODO: remove the following or design a different
// test
count = 0;
iter = dbfull()->TEST_NewInternalIterator(handles_[1]);
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
ASSERT_NE(ikey.sequence, (unsigned)0);
count++;
iter->Next();
{
ScopedArenaIterator iter(
dbfull()->TEST_NewInternalIterator(&arena, handles_[1]));
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
ASSERT_NE(ikey.sequence, (unsigned)0);
count++;
iter->Next();
}
ASSERT_EQ(count, 0);
}
ASSERT_EQ(count, 0);
delete iter;
}
// Tests the edge case where compaction does not produce any output -- all
@ -4429,22 +4436,24 @@ TEST(DBTest, CompactionFilterContextManual) {
// Verify total number of keys is correct after manual compaction.
int count = 0;
int total = 0;
Iterator* iter = dbfull()->TEST_NewInternalIterator();
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
{
Arena arena;
ScopedArenaIterator iter(dbfull()->TEST_NewInternalIterator(&arena));
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
}
iter->Next();
}
iter->Next();
ASSERT_EQ(total, 700);
ASSERT_EQ(count, 1);
}
ASSERT_EQ(total, 700);
ASSERT_EQ(count, 1);
delete iter;
}
class KeepFilterV2 : public CompactionFilterV2 {
@ -4601,25 +4610,27 @@ TEST(DBTest, CompactionFilterV2) {
// All the files are in the lowest level.
int count = 0;
int total = 0;
Iterator* iter = dbfull()->TEST_NewInternalIterator();
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
{
Arena arena;
ScopedArenaIterator iter(dbfull()->TEST_NewInternalIterator(&arena));
iter->SeekToFirst();
ASSERT_OK(iter->status());
while (iter->Valid()) {
ParsedInternalKey ikey(Slice(), 0, kTypeValue);
ikey.sequence = -1;
ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true);
total++;
if (ikey.sequence != 0) {
count++;
}
iter->Next();
}
iter->Next();
}
ASSERT_EQ(total, 100000);
// 1 snapshot only. Since we are using universal compacton,
// the sequence no is cleared for better compression
ASSERT_EQ(count, 1);
delete iter;
// create a new database with the compaction
// filter in such a way that it deletes all keys
@ -4643,7 +4654,7 @@ TEST(DBTest, CompactionFilterV2) {
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
// Scan the entire database to ensure that nothing is left
iter = db_->NewIterator(ReadOptions());
Iterator* iter = db_->NewIterator(ReadOptions());
iter->SeekToFirst();
count = 0;
while (iter->Valid()) {
@ -7743,6 +7754,167 @@ TEST(DBTest, TableOptionsSanitizeTest) {
ASSERT_TRUE(TryReopen(&options).IsNotSupported());
}
TEST(DBTest, DBIteratorBoundTest) {
Options options;
options.env = env_;
options.create_if_missing = true;
options.prefix_extractor = nullptr;
DestroyAndReopen(&options);
ASSERT_OK(Put("a", "0"));
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("foo1", "bar1"));
ASSERT_OK(Put("g1", "0"));
// testing basic case with no iterate_upper_bound and no prefix_extractor
{
ReadOptions ro;
ro.iterate_upper_bound = nullptr;
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
iter->Seek("foo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
}
// testing iterate_upper_bound and forward iterator
// to make sure it stops at bound
{
ReadOptions ro;
// iterate_upper_bound points beyond the last expected entry
Slice prefix("foo2");
ro.iterate_upper_bound = &prefix;
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
iter->Seek("foo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(("foo1")), 0);
iter->Next();
// should stop here...
ASSERT_TRUE(!iter->Valid());
}
// prefix is the first letter of the key
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
DestroyAndReopen(&options);
ASSERT_OK(Put("a", "0"));
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("foo1", "bar1"));
ASSERT_OK(Put("g1", "0"));
// testing with iterate_upper_bound and prefix_extractor
// Seek target and iterate_upper_bound are not is same prefix
// This should be an error
{
ReadOptions ro;
Slice prefix("g1");
ro.iterate_upper_bound = &prefix;
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
iter->Seek("foo");
ASSERT_TRUE(!iter->Valid());
ASSERT_TRUE(iter->status().IsInvalidArgument());
}
// testing that iterate_upper_bound prevents iterating over deleted items
// if the bound has already reached
{
options.prefix_extractor = nullptr;
DestroyAndReopen(&options);
ASSERT_OK(Put("a", "0"));
ASSERT_OK(Put("b", "0"));
ASSERT_OK(Put("b1", "0"));
ASSERT_OK(Put("c", "0"));
ASSERT_OK(Put("d", "0"));
ASSERT_OK(Put("e", "0"));
ASSERT_OK(Delete("c"));
ASSERT_OK(Delete("d"));
// base case with no bound
ReadOptions ro;
ro.iterate_upper_bound = nullptr;
std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
iter->Seek("b");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("b")), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(("b1")), 0);
perf_context.Reset();
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 2);
// now testing with iterate_bound
Slice prefix("c");
ro.iterate_upper_bound = &prefix;
iter.reset(db_->NewIterator(ro));
perf_context.Reset();
iter->Seek("b");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("b")), 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(("b1")), 0);
iter->Next();
// the iteration should stop as soon as the the bound key is reached
// even though the key is deleted
// hence internal_delete_skipped_count should be 0
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
}
}
TEST(DBTest, WriteSingleThreadEntry) {
std::vector<std::thread> threads;
dbfull()->TEST_LockMutex();
auto w = dbfull()->TEST_BeginWrite();
threads.emplace_back([&] { Put("a", "b"); });
env_->SleepForMicroseconds(10000);
threads.emplace_back([&] { Flush(); });
env_->SleepForMicroseconds(10000);
dbfull()->TEST_UnlockMutex();
dbfull()->TEST_LockMutex();
dbfull()->TEST_EndWrite(w);
dbfull()->TEST_UnlockMutex();
for (auto& t : threads) {
t.join();
}
}
} // namespace rocksdb
int main(int argc, char** argv) {

@ -244,7 +244,7 @@ class IterKey {
Slice GetKey() const { return Slice(key_, key_size_); }
const size_t Size() { return key_size_; }
size_t Size() { return key_size_; }
void Clear() { key_size_ = 0; }

@ -6,7 +6,10 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include "db/filename.h"
#include <inttypes.h>

@ -132,9 +132,11 @@ ForwardIterator::~ForwardIterator() {
}
void ForwardIterator::Cleanup() {
delete mutable_iter_;
if (mutable_iter_ != nullptr) {
mutable_iter_->~Iterator();
}
for (auto* m : imm_iters_) {
delete m;
m->~Iterator();
}
imm_iters_.clear();
for (auto* f : l0_iters_) {
@ -401,8 +403,8 @@ void ForwardIterator::RebuildIterators() {
Cleanup();
// New
sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_));
mutable_iter_ = sv_->mem->NewIterator(read_options_);
sv_->imm->AddIterators(read_options_, &imm_iters_);
mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_);
sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_);
const auto& l0_files = sv_->current->files_[0];
l0_iters_.reserve(l0_files.size());
for (const auto* l0 : l0_files) {

@ -14,6 +14,7 @@
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "db/dbformat.h"
#include "util/arena.h"
namespace rocksdb {
@ -100,6 +101,7 @@ class ForwardIterator : public Iterator {
IterKey prev_key_;
bool is_prev_set_;
Arena arena_;
};
} // namespace rocksdb

@ -7,7 +7,11 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/internal_stats.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <vector>
#include "db/column_family.h"

@ -249,13 +249,9 @@ class MemTableIterator: public Iterator {
};
Iterator* MemTable::NewIterator(const ReadOptions& options, Arena* arena) {
if (arena == nullptr) {
return new MemTableIterator(*this, options, nullptr);
} else {
auto mem = arena->AllocateAligned(sizeof(MemTableIterator));
return new (mem)
MemTableIterator(*this, options, arena);
}
assert(arena != nullptr);
auto mem = arena->AllocateAligned(sizeof(MemTableIterator));
return new (mem) MemTableIterator(*this, options, arena);
}
port::RWMutex* MemTable::GetLock(const Slice& key) {

@ -81,8 +81,7 @@ class MemTable {
// arena: If not null, the arena needs to be used to allocate the Iterator.
// Calling ~Iterator of the iterator will destroy all the states but
// those allocated in arena.
Iterator* NewIterator(const ReadOptions& options,
Arena* arena = nullptr);
Iterator* NewIterator(const ReadOptions& options, Arena* arena);
// Add an entry into memtable that maps key to value at the
// specified sequence number and with the specified type.

@ -73,9 +73,10 @@ bool MemTableListVersion::Get(const LookupKey& key, std::string* value,
}
void MemTableListVersion::AddIterators(const ReadOptions& options,
std::vector<Iterator*>* iterator_list) {
std::vector<Iterator*>* iterator_list,
Arena* arena) {
for (auto& m : memlist_) {
iterator_list->push_back(m->NewIterator(options));
iterator_list->push_back(m->NewIterator(options, arena));
}
}

@ -49,7 +49,7 @@ class MemTableListVersion {
MergeContext& merge_context, const Options& options);
void AddIterators(const ReadOptions& options,
std::vector<Iterator*>* iterator_list);
std::vector<Iterator*>* iterator_list, Arena* arena);
void AddIterators(const ReadOptions& options,
MergeIteratorBuilder* merge_iter_builder);

@ -192,16 +192,17 @@ extern const uint64_t kPlainTableMagicNumber;
class TestPlainTableReader : public PlainTableReader {
public:
TestPlainTableReader(const EnvOptions& storage_options,
TestPlainTableReader(const EnvOptions& env_options,
const InternalKeyComparator& icomparator,
EncodingType encoding_type, uint64_t file_size,
int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness,
const TableProperties* table_properties,
unique_ptr<RandomAccessFile>&& file,
const Options& options, bool* expect_bloom_not_match,
const ImmutableCFOptions& ioptions,
bool* expect_bloom_not_match,
bool store_index_in_file)
: PlainTableReader(options, std::move(file), storage_options, icomparator,
: PlainTableReader(ioptions, std::move(file), env_options, icomparator,
encoding_type, file_size, table_properties),
expect_bloom_not_match_(expect_bloom_not_match) {
Status s = MmapDataFile();
@ -218,7 +219,7 @@ class TestPlainTableReader : public PlainTableReader {
PlainTablePropertyNames::kBloomVersion);
ASSERT_TRUE(bloom_version_ptr != props->user_collected_properties.end());
ASSERT_EQ(bloom_version_ptr->second, std::string("1"));
if (options.bloom_locality > 0) {
if (ioptions.bloom_locality > 0) {
auto num_blocks_ptr = props->user_collected_properties.find(
PlainTablePropertyNames::kNumBloomBlocks);
ASSERT_TRUE(num_blocks_ptr != props->user_collected_properties.end());
@ -253,25 +254,26 @@ class TestPlainTableFactory : public PlainTableFactory {
store_index_in_file_(options.store_index_in_file),
expect_bloom_not_match_(expect_bloom_not_match) {}
Status NewTableReader(const Options& options, const EnvOptions& soptions,
Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override {
TableProperties* props = nullptr;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
options.env, options.info_log.get(), &props);
ioptions.env, ioptions.info_log, &props);
ASSERT_TRUE(s.ok());
if (store_index_in_file_) {
BlockHandle bloom_block_handle;
s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
options.env, BloomBlockBuilder::kBloomBlock,
ioptions.env, BloomBlockBuilder::kBloomBlock,
&bloom_block_handle);
ASSERT_TRUE(s.ok());
BlockHandle index_block_handle;
s = FindMetaBlock(
file.get(), file_size, kPlainTableMagicNumber, options.env,
file.get(), file_size, kPlainTableMagicNumber, ioptions.env,
PlainTableIndexBuilder::kPlainTableIndexBlock, &index_block_handle);
ASSERT_TRUE(s.ok());
}
@ -284,9 +286,9 @@ class TestPlainTableFactory : public PlainTableFactory {
DecodeFixed32(encoding_type_prop->second.c_str()));
std::unique_ptr<PlainTableReader> new_reader(new TestPlainTableReader(
soptions, internal_comparator, encoding_type, file_size,
env_options, internal_comparator, encoding_type, file_size,
bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props,
std::move(file), options, expect_bloom_not_match_,
std::move(file), ioptions, expect_bloom_not_match_,
store_index_in_file_));
*table = std::move(new_reader);

@ -31,7 +31,10 @@
#ifndef ROCKSDB_LITE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include "db/builder.h"
#include "db/db_impl.h"
@ -46,6 +49,9 @@
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "rocksdb/immutable_options.h"
#include "util/scoped_arena_iterator.h"
namespace rocksdb {
@ -58,6 +64,7 @@ class Repairer {
env_(options.env),
icmp_(options.comparator),
options_(SanitizeOptions(dbname, &icmp_, options)),
ioptions_(options_),
raw_table_cache_(
// TableCache can be small since we expect each table to be opened
// once.
@ -65,7 +72,7 @@ class Repairer {
options_.table_cache_remove_scan_count_limit)),
next_file_number_(1) {
table_cache_ =
new TableCache(&options_, storage_options_, raw_table_cache_.get());
new TableCache(ioptions_, env_options_, raw_table_cache_.get());
edit_ = new VersionEdit();
}
@ -107,8 +114,9 @@ class Repairer {
std::string const dbname_;
Env* const env_;
InternalKeyComparator const icmp_;
Options const options_;
const InternalKeyComparator icmp_;
const Options options_;
const ImmutableCFOptions ioptions_;
std::shared_ptr<Cache> raw_table_cache_;
TableCache* table_cache_;
VersionEdit* edit_;
@ -118,7 +126,7 @@ class Repairer {
std::vector<uint64_t> logs_;
std::vector<TableInfo> tables_;
uint64_t next_file_number_;
const EnvOptions storage_options_;
const EnvOptions env_options_;
Status FindFiles() {
std::vector<std::string> filenames;
@ -190,7 +198,7 @@ class Repairer {
// Open the log file
std::string logname = LogFileName(dbname_, log);
unique_ptr<SequentialFile> lfile;
Status status = env_->NewSequentialFile(logname, &lfile, storage_options_);
Status status = env_->NewSequentialFile(logname, &lfile, env_options_);
if (!status.ok()) {
return status;
}
@ -236,12 +244,15 @@ class Repairer {
// since ExtractMetaData() will also generate edits.
FileMetaData meta;
meta.fd = FileDescriptor(next_file_number_++, 0, 0);
ReadOptions ro;
ro.total_order_seek = true;
Iterator* iter = mem->NewIterator(ro);
status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_,
iter, &meta, icmp_, 0, 0, kNoCompression);
delete iter;
{
ReadOptions ro;
ro.total_order_seek = true;
Arena arena;
ScopedArenaIterator iter(mem->NewIterator(ro, &arena));
status = BuildTable(dbname_, env_, ioptions_, env_options_, table_cache_,
iter.get(), &meta, icmp_, 0, 0, kNoCompression,
CompressionOptions());
}
delete mem->Unref();
delete cf_mems_default;
mem = nullptr;
@ -286,7 +297,7 @@ class Repairer {
file_size);
if (status.ok()) {
Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, icmp_, t->meta.fd);
ReadOptions(), env_options_, icmp_, t->meta.fd);
bool empty = true;
ParsedInternalKey parsed;
t->min_sequence = 0;
@ -326,7 +337,7 @@ class Repairer {
std::string tmp = TempFileName(dbname_, 1);
unique_ptr<WritableFile> file;
Status status = env_->NewWritableFile(
tmp, &file, env_->OptimizeForManifestWrite(storage_options_));
tmp, &file, env_->OptimizeForManifestWrite(env_options_));
if (!status.ok()) {
return status;
}

@ -79,7 +79,8 @@ public:
// for the duration of the returned table's lifetime.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options, const EnvOptions& soptions,
static Status Open(const ImmutableCFOptions& options,
const EnvOptions& env_options,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader);
@ -160,14 +161,14 @@ private:
struct SimpleTableReader::Rep {
~Rep() {
}
Rep(const EnvOptions& storage_options, uint64_t index_start_offset,
int num_entries) :
soptions(storage_options), index_start_offset(index_start_offset),
num_entries(num_entries) {
Rep(const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
uint64_t index_start_offset, int num_entries) :
ioptions(ioptions), env_options(env_options),
index_start_offset(index_start_offset), num_entries(num_entries) {
}
Options options;
const EnvOptions& soptions;
const ImmutableCFOptions& ioptions;
const EnvOptions& env_options;
Status status;
unique_ptr<RandomAccessFile> file;
uint64_t index_start_offset;
@ -187,8 +188,8 @@ SimpleTableReader::~SimpleTableReader() {
delete rep_;
}
Status SimpleTableReader::Open(const Options& options,
const EnvOptions& soptions,
Status SimpleTableReader::Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<TableReader>* table_reader) {
@ -201,12 +202,10 @@ Status SimpleTableReader::Open(const Options& options,
int num_entries = (size - Rep::offset_length - index_start_offset)
/ (Rep::GetInternalKeyLength() + Rep::offset_length);
SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions,
index_start_offset,
num_entries);
SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(
ioptions, env_options, index_start_offset, num_entries);
rep->file = std::move(file);
rep->options = options;
table_reader->reset(new SimpleTableReader(rep));
}
return s;
@ -248,7 +247,7 @@ Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) {
return s;
}
InternalKeyComparator ikc(rep_->options.comparator);
InternalKeyComparator ikc(rep_->ioptions.comparator);
int compare_result = ikc.Compare(tmp_slice, target);
if (compare_result < 0) {
@ -382,7 +381,7 @@ void SimpleTableIterator::Prev() {
}
Slice SimpleTableIterator::key() const {
Log(table_->rep_->options.info_log, "key!!!!");
Log(table_->rep_->ioptions.info_log, "key!!!!");
return key_;
}
@ -401,7 +400,7 @@ public:
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
SimpleTableBuilder(const Options& options, WritableFile* file,
SimpleTableBuilder(const ImmutableCFOptions& ioptions, WritableFile* file,
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called.
@ -444,7 +443,7 @@ private:
};
struct SimpleTableBuilder::Rep {
Options options;
const ImmutableCFOptions& ioptions;
WritableFile* file;
uint64_t offset = 0;
Status status;
@ -463,17 +462,17 @@ struct SimpleTableBuilder::Rep {
std::string index;
Rep(const Options& opt, WritableFile* f) :
options(opt), file(f) {
Rep(const ImmutableCFOptions& iopt, WritableFile* f) :
ioptions(iopt), file(f) {
}
~Rep() {
}
};
SimpleTableBuilder::SimpleTableBuilder(const Options& options,
SimpleTableBuilder::SimpleTableBuilder(const ImmutableCFOptions& ioptions,
WritableFile* file,
CompressionType compression_type) :
rep_(new SimpleTableBuilder::Rep(options, file)) {
rep_(new SimpleTableBuilder::Rep(ioptions, file)) {
}
SimpleTableBuilder::~SimpleTableBuilder() {
@ -546,15 +545,18 @@ public:
const char* Name() const override {
return "SimpleTable";
}
Status NewTableReader(const Options& options, const EnvOptions& soptions,
Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const;
TableBuilder* NewTableBuilder(const Options& options,
const InternalKeyComparator& internal_key,
WritableFile* file,
CompressionType compression_type) const;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const;
virtual Status SanitizeDBOptions(const DBOptions* db_opts) const override {
return Status::OK();
@ -566,19 +568,22 @@ public:
};
Status SimpleTableFactory::NewTableReader(
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return SimpleTableReader::Open(options, soptions, std::move(file), file_size,
table_reader);
return SimpleTableReader::Open(ioptions, env_options, std::move(file),
file_size, table_reader);
}
TableBuilder* SimpleTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_key,
WritableFile* file, CompressionType compression_type) const {
return new SimpleTableBuilder(options, file, compression_type);
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
return new SimpleTableBuilder(ioptions, file, compression_type);
}
class SimpleTableDBTest {

@ -71,7 +71,7 @@ class SnapshotList {
}
// get the sequence number of the most recent snapshot
const SequenceNumber GetNewest() {
SequenceNumber GetNewest() {
if (empty()) {
return 0;
}

@ -36,12 +36,10 @@ static Slice GetSliceForFileNumber(const uint64_t* file_number) {
sizeof(*file_number));
}
TableCache::TableCache(const Options* options,
const EnvOptions& storage_options, Cache* const cache)
: env_(options->env),
db_paths_(options->db_paths),
options_(options),
storage_options_(storage_options),
TableCache::TableCache(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, Cache* const cache)
: ioptions_(ioptions),
env_options_(env_options),
cache_(cache) {}
TableCache::~TableCache() {
@ -55,7 +53,7 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
cache_->Release(handle);
}
Status TableCache::FindTable(const EnvOptions& toptions,
Status TableCache::FindTable(const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, Cache::Handle** handle,
const bool no_io) {
@ -68,24 +66,24 @@ Status TableCache::FindTable(const EnvOptions& toptions,
return Status::Incomplete("Table not found in table_cache, no_io is set");
}
std::string fname =
TableFileName(db_paths_, fd.GetNumber(), fd.GetPathId());
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
unique_ptr<RandomAccessFile> file;
unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics.get(), NO_FILE_OPENS);
s = ioptions_.env->NewRandomAccessFile(fname, &file, env_options);
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.ok()) {
if (options_->advise_random_on_open) {
if (ioptions_.advise_random_on_open) {
file->Hint(RandomAccessFile::RANDOM);
}
StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS);
s = options_->table_factory->NewTableReader(
*options_, toptions, internal_comparator, std::move(file),
StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
s = ioptions_.table_factory->NewTableReader(
ioptions_, env_options, internal_comparator, std::move(file),
fd.GetFileSize(), &table_reader);
}
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(options_->statistics.get(), NO_FILE_ERRORS);
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
} else {
@ -97,7 +95,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
}
Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions,
const EnvOptions& env_options,
const InternalKeyComparator& icomparator,
const FileDescriptor& fd,
TableReader** table_reader_ptr,
@ -109,7 +107,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Cache::Handle* handle = nullptr;
Status s;
if (table_reader == nullptr) {
s = FindTable(toptions, icomparator, fd, &handle,
s = FindTable(env_options, icomparator, fd, &handle,
options.read_tier == kBlockCacheTier);
if (!s.ok()) {
return NewErrorIterator(s, arena);
@ -142,7 +140,7 @@ Status TableCache::Get(const ReadOptions& options,
Status s;
Cache::Handle* handle = nullptr;
if (!t) {
s = FindTable(storage_options_, internal_comparator, fd, &handle,
s = FindTable(env_options_, internal_comparator, fd, &handle,
options.read_tier == kBlockCacheTier);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
@ -160,8 +158,9 @@ Status TableCache::Get(const ReadOptions& options,
}
return s;
}
Status TableCache::GetTableProperties(
const EnvOptions& toptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
std::shared_ptr<const TableProperties>* properties, bool no_io) {
Status s;
@ -174,7 +173,7 @@ Status TableCache::GetTableProperties(
}
Cache::Handle* table_handle = nullptr;
s = FindTable(toptions, internal_comparator, fd, &table_handle, no_io);
s = FindTable(env_options, internal_comparator, fd, &table_handle, no_io);
if (!s.ok()) {
return s;
}
@ -186,7 +185,7 @@ Status TableCache::GetTableProperties(
}
size_t TableCache::GetMemoryUsageByTableReader(
const EnvOptions& toptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd) {
Status s;
@ -197,7 +196,7 @@ size_t TableCache::GetMemoryUsageByTableReader(
}
Cache::Handle* table_handle = nullptr;
s = FindTable(toptions, internal_comparator, fd, &table_handle, true);
s = FindTable(env_options, internal_comparator, fd, &table_handle, true);
if (!s.ok()) {
return 0;
}

@ -19,6 +19,7 @@
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/options.h"
#include "table/table_reader.h"
namespace rocksdb {
@ -29,8 +30,8 @@ struct FileDescriptor;
class TableCache {
public:
TableCache(const Options* options, const EnvOptions& storage_options,
Cache* cache);
TableCache(const ImmutableCFOptions& ioptions,
const EnvOptions& storage_options, Cache* cache);
~TableCache();
// Return an iterator for the specified file number (the corresponding
@ -91,10 +92,8 @@ class TableCache {
void ReleaseHandle(Cache::Handle* handle);
private:
Env* const env_;
const std::vector<DbPath> db_paths_;
const Options* options_;
const EnvOptions& storage_options_;
const ImmutableCFOptions& ioptions_;
const EnvOptions& env_options_;
Cache* const cache_;
};

@ -11,6 +11,7 @@
#include "db/dbformat.h"
#include "db/table_properties_collector.h"
#include "rocksdb/table.h"
#include "rocksdb/immutable_options.h"
#include "table/block_based_table_factory.h"
#include "table/meta_blocks.h"
#include "table/plain_table_factory.h"
@ -85,12 +86,14 @@ class DumbLogger : public Logger {
// Utilities test functions
namespace {
void MakeBuilder(const Options& options,
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
std::unique_ptr<FakeWritableFile>* writable,
std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile);
builder->reset(options.table_factory->NewTableBuilder(
options, internal_comparator, writable->get(), options.compression));
builder->reset(ioptions.table_factory->NewTableBuilder(
ioptions, internal_comparator, writable->get(),
options.compression, options.compression_opts));
}
} // namespace
@ -153,7 +156,8 @@ void TestCustomizedTablePropertiesCollector(
// -- Step 1: build table
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable;
MakeBuilder(options, internal_comparator, &writable, &builder);
const ImmutableCFOptions ioptions(options);
MakeBuilder(options, ioptions, internal_comparator, &writable, &builder);
for (const auto& kv : kvs) {
if (encode_as_internal) {
@ -264,9 +268,10 @@ void TestInternalKeyPropertiesCollector(
options.table_properties_collector_factories = {
std::make_shared<InternalKeyPropertiesCollectorFactory>()};
}
const ImmutableCFOptions ioptions(options);
for (int iter = 0; iter < 2; ++iter) {
MakeBuilder(options, pikc, &writable, &builder);
MakeBuilder(options, ioptions, pikc, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}

@ -163,13 +163,13 @@ class VersionEdit {
// Add the specified file at the specified number.
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
void AddFile(int level, uint64_t file, uint64_t file_size,
uint64_t file_path_id, const InternalKey& smallest,
void AddFile(int level, uint64_t file, uint64_t file_path_id,
uint64_t file_size, const InternalKey& smallest,
const InternalKey& largest, const SequenceNumber& smallest_seqno,
const SequenceNumber& largest_seqno) {
assert(smallest_seqno <= largest_seqno);
FileMetaData f;
f.fd = FileDescriptor(file, file_size, file_path_id);
f.fd = FileDescriptor(file, file_path_id, file_size);
f.smallest = smallest;
f.largest = largest;
f.smallest_seqno = smallest_seqno;

@ -9,7 +9,10 @@
#include "db/version_set.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>
#include <map>
@ -596,31 +599,6 @@ uint64_t Version::GetEstimatedActiveKeys() {
return num_non_deletions_ - num_deletions_;
}
void Version::AddIterators(const ReadOptions& read_options,
const EnvOptions& soptions,
std::vector<Iterator*>* iters) {
// Merge all level zero files together since they may overlap
for (size_t i = 0; i < file_levels_[0].num_files; i++) {
const auto& file = file_levels_[0].files[i];
iters->push_back(cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), file.fd));
}
// For levels > 0, we can use a concatenating iterator that sequentially
// walks through the non-overlapping files in the level, opening them
// lazily.
for (int level = 1; level < num_levels_; level++) {
if (file_levels_[level].num_files != 0) {
iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState(
cfd_->table_cache(), read_options, soptions,
cfd_->internal_comparator(), false /* for_compaction */,
cfd_->options()->prefix_extractor != nullptr),
new LevelFileNumIterator(cfd_->internal_comparator(),
&file_levels_[level])));
}
}
}
void Version::AddIterators(const ReadOptions& read_options,
const EnvOptions& soptions,
MergeIteratorBuilder* merge_iter_builder) {

@ -86,8 +86,6 @@ class Version {
// Append to *iters a sequence of iterators that will
// yield the contents of this Version when merged together.
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
void AddIterators(const ReadOptions&, const EnvOptions& soptions,
std::vector<Iterator*>* iters);
void AddIterators(const ReadOptions&, const EnvOptions& soptions,
MergeIteratorBuilder* merger_iter_builder);

@ -18,6 +18,7 @@
#include "rocksdb/utilities/write_batch_with_index.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/scoped_arena_iterator.h"
namespace rocksdb {
@ -32,7 +33,8 @@ static std::string PrintContents(WriteBatch* b) {
ColumnFamilyMemTablesDefault cf_mems_default(mem, &options);
Status s = WriteBatchInternal::InsertInto(b, &cf_mems_default);
int count = 0;
Iterator* iter = mem->NewIterator(ReadOptions());
Arena arena;
ScopedArenaIterator iter(mem->NewIterator(ReadOptions(), &arena));
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ParsedInternalKey ikey;
memset((void *)&ikey, 0, sizeof(ikey));
@ -67,7 +69,6 @@ static std::string PrintContents(WriteBatch* b) {
state.append("@");
state.append(NumberToString(ikey.sequence));
}
delete iter;
if (!s.ok()) {
state.append(s.ToString());
} else if (count != WriteBatchInternal::Count(b)) {

@ -698,6 +698,10 @@ extern void rocksdb_readoptions_set_fill_cache(
extern void rocksdb_readoptions_set_snapshot(
rocksdb_readoptions_t*,
const rocksdb_snapshot_t*);
extern void rocksdb_readoptions_set_iterate_upper_bound(
rocksdb_readoptions_t*,
const char* key,
size_t keylen);
extern void rocksdb_readoptions_set_read_tier(
rocksdb_readoptions_t*, int);
extern void rocksdb_readoptions_set_tailing(

@ -0,0 +1,62 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#include <vector>
#include "rocksdb/options.h"
namespace rocksdb {
// ImmutableCFOptions is a data struct used by RocksDB internal. It contains a
// subset of Options that should not be changed during the entire lifetime
// of DB. You shouldn't need to access this data structure unless you are
// implementing a new TableFactory.
struct ImmutableCFOptions {
explicit ImmutableCFOptions(const Options& options);
const SliceTransform* prefix_extractor;
const Comparator* comparator;
MergeOperator* merge_operator;
Logger* info_log;
Statistics* statistics;
InfoLogLevel info_log_level;
Env* env;
// Allow the OS to mmap file for reading sst tables. Default: false
bool allow_mmap_reads;
// Allow the OS to mmap file for writing. Default: false
bool allow_mmap_writes;
std::vector<DbPath> db_paths;
TableFactory* table_factory;
Options::TablePropertiesCollectorFactories
table_properties_collector_factories;
bool advise_random_on_open;
// This options is required by PlainTableReader. May need to move it
// to PlainTalbeOptions just like bloom_bits_per_key
uint32_t bloom_locality;
bool purge_redundant_kvs_while_flush;
uint32_t min_partial_merge_operands;
bool disable_data_sync;
bool use_fsync;
};
} // namespace rocksdb

@ -409,10 +409,24 @@ struct ColumnFamilyOptions {
std::shared_ptr<MemTableRepFactory> memtable_factory;
// This is a factory that provides TableFactory objects.
// Default: a factory that provides a default implementation of
// Table and TableBuilder.
// Default: a block-based table factory that provides a default
// implementation of TableBuilder and TableReader with default
// BlockBasedTableOptions.
std::shared_ptr<TableFactory> table_factory;
// Block-based table related options are moved to BlockBasedTableOptions.
// Related options that were originally here but now moved include:
// no_block_cache
// block_cache
// block_cache_compressed
// block_size
// block_size_deviation
// block_restart_interval
// filter_policy
// whole_key_filtering
// If you'd like to customize some of these options, you will need to
// use NewBlockBasedTableFactory() to construct a new table factory.
// This option allows user to to collect their own interested statistics of
// the tables.
// Default: empty vector -- no user-defined statistics collection will be
@ -889,6 +903,18 @@ struct ReadOptions {
// ! DEPRECATED
// const Slice* prefix;
// "iterate_upper_bound" defines the extent upto which the forward iterator
// can returns entries. Once the bound is reached, Valid() will be false.
// "iterate_upper_bound" is exclusive ie the bound value is
// not a valid entry. If iterator_extractor is not null, the Seek target
// and iterator_upper_bound need to have the same prefix.
// This is because ordering is not guaranteed outside of prefix domain.
// There is no lower bound on the iterator. If needed, that can be easily
// implemented
//
// Default: nullptr
const Slice* iterate_upper_bound;
// Specify if this read request should process data that ALREADY
// resides on a particular cache. If the required data is not
// found at the specified cache, then Status::Incomplete is returned.
@ -912,6 +938,7 @@ struct ReadOptions {
: verify_checksums(true),
fill_cache(true),
snapshot(nullptr),
iterate_upper_bound(nullptr),
read_tier(kReadAllTier),
tailing(false),
total_order_seek(false) {}
@ -919,6 +946,7 @@ struct ReadOptions {
: verify_checksums(cksum),
fill_cache(cache),
snapshot(nullptr),
iterate_upper_bound(nullptr),
read_tier(kReadAllTier),
tailing(false),
total_order_seek(false) {}

@ -23,6 +23,7 @@
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/immutable_options.h"
#include "rocksdb/status.h"
namespace rocksdb {
@ -293,14 +294,15 @@ class TableFactory {
// and cache the table object returned.
// (1) SstFileReader (for SST Dump) opens the table and dump the table
// contents using the interator of the table.
// options and soptions are options. options is the general options.
// ImmutableCFOptions is a subset of Options that can not be altered.
// EnvOptions is a subset of Options that will be used by Env.
// Multiple configured can be accessed from there, including and not
// limited to block cache and key comparators.
// file is a file handler to handle the file for the table
// file_size is the physical file size of the file
// table_reader is the output table reader
virtual Status NewTableReader(
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const = 0;
@ -318,14 +320,17 @@ class TableFactory {
// (4) When running Repairer, it creates a table builder to convert logs to
// SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
//
// options is the general options. Multiple configured can be acceseed from
// there, including and not limited to compression options.
// file is a handle of a writable file. It is the caller's responsibility to
// keep the file open and close the file after closing the table builder.
// compression_type is the compression type to use in this table.
// ImmutableCFOptions is a subset of Options that can not be altered.
// Multiple configured can be acceseed from there, including and not limited
// to compression options. file is a handle of a writable file.
// It is the caller's responsibility to keep the file open and close the file
// after closing the table builder. compression_type is the compression type
// to use in this table.
virtual TableBuilder* NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const = 0;
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const = 0;
// Sanitizes the specified DB Options.
//

@ -10,7 +10,10 @@
#pragma once
#ifndef ROCKSDB_LITE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <string>
#include <map>

@ -18,6 +18,7 @@
#include "rocksdb/env.h"
#include "rocksdb/memtablerep.h"
#include "util/logging.h"
#include "util/scoped_arena_iterator.h"
#include "util/testharness.h"
/*
@ -209,7 +210,9 @@ jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(
rocksdb::Status s =
rocksdb::WriteBatchInternal::InsertInto(b, &cf_mems_default);
int count = 0;
rocksdb::Iterator* iter = mem->NewIterator(rocksdb::ReadOptions());
Arena arena;
ScopedArenaIterator iter(mem->NewIterator(
rocksdb::ReadOptions(), false /*don't enforce total order*/, &arena));
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
rocksdb::ParsedInternalKey ikey;
memset(reinterpret_cast<void*>(&ikey), 0, sizeof(ikey));
@ -244,7 +247,6 @@ jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(
state.append("@");
state.append(rocksdb::NumberToString(ikey.sequence));
}
delete iter;
if (!s.ok()) {
state.append(s.ToString());
} else if (count != rocksdb::WriteBatchInternal::Count(b)) {

@ -39,7 +39,7 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber;
extern const uint64_t kCuckooTableMagicNumber;
Status AdaptiveTableFactory::NewTableReader(
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& icomp, unique_ptr<RandomAccessFile>&& file,
uint64_t file_size, unique_ptr<TableReader>* table) const {
Footer footer;
@ -50,24 +50,26 @@ Status AdaptiveTableFactory::NewTableReader(
if (footer.table_magic_number() == kPlainTableMagicNumber ||
footer.table_magic_number() == kLegacyPlainTableMagicNumber) {
return plain_table_factory_->NewTableReader(
options, soptions, icomp, std::move(file), file_size, table);
ioptions, env_options, icomp, std::move(file), file_size, table);
} else if (footer.table_magic_number() == kBlockBasedTableMagicNumber ||
footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) {
return block_based_table_factory_->NewTableReader(
options, soptions, icomp, std::move(file), file_size, table);
ioptions, env_options, icomp, std::move(file), file_size, table);
} else if (footer.table_magic_number() == kCuckooTableMagicNumber) {
return cuckoo_table_factory_->NewTableReader(
options, soptions, icomp, std::move(file), file_size, table);
ioptions, env_options, icomp, std::move(file), file_size, table);
} else {
return Status::NotSupported("Unidentified table format");
}
}
TableBuilder* AdaptiveTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const {
return table_factory_to_write_->NewTableBuilder(options, internal_comparator,
file, compression_type);
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
return table_factory_to_write_->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
}
std::string AdaptiveTableFactory::GetPrintableTableOptions() const {

@ -12,7 +12,6 @@
namespace rocksdb {
struct Options;
struct EnvOptions;
using std::unique_ptr;
@ -31,16 +30,21 @@ class AdaptiveTableFactory : public TableFactory {
std::shared_ptr<TableFactory> block_based_table_factory,
std::shared_ptr<TableFactory> plain_table_factory,
std::shared_ptr<TableFactory> cuckoo_table_factory);
const char* Name() const override { return "AdaptiveTableFactory"; }
Status NewTableReader(const Options& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(const Options& options,
const InternalKeyComparator& icomparator,
WritableFile* file,
CompressionType compression_type) const
override;
Status NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {

@ -25,7 +25,6 @@
#include "rocksdb/env.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/flush_block_policy.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "table/block.h"
@ -385,7 +384,7 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
};
struct BlockBasedTableBuilder::Rep {
const Options options;
const ImmutableCFOptions ioptions;
const BlockBasedTableOptions table_options;
const InternalKeyComparator& internal_comparator;
WritableFile* file;
@ -397,7 +396,8 @@ struct BlockBasedTableBuilder::Rep {
std::unique_ptr<IndexBuilder> index_builder;
std::string last_key;
CompressionType compression_type;
const CompressionType compression_type;
const CompressionOptions compression_opts;
TableProperties props;
bool closed = false; // Either Finish() or Abandon() has been called.
@ -413,27 +413,31 @@ struct BlockBasedTableBuilder::Rep {
std::vector<std::unique_ptr<TablePropertiesCollector>>
table_properties_collectors;
Rep(const Options& opt, const BlockBasedTableOptions& table_opt,
Rep(const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_opt,
const InternalKeyComparator& icomparator,
WritableFile* f, CompressionType compression_type)
: options(opt),
WritableFile* f, const CompressionType compression_type,
const CompressionOptions& compression_opts)
: ioptions(ioptions),
table_options(table_opt),
internal_comparator(icomparator),
file(f),
data_block(table_options.block_restart_interval),
internal_prefix_transform(options.prefix_extractor.get()),
internal_prefix_transform(ioptions.prefix_extractor),
index_builder(CreateIndexBuilder(
table_options.index_type, &internal_comparator,
&this->internal_prefix_transform)),
compression_type(compression_type),
compression_opts(compression_opts),
filter_block(table_options.filter_policy == nullptr ?
nullptr :
new FilterBlockBuilder(opt, table_options, &internal_comparator)),
new FilterBlockBuilder(ioptions.prefix_extractor,
table_options, &internal_comparator)),
flush_block_policy(
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
table_options, data_block)) {
for (auto& collector_factories :
options.table_properties_collector_factories) {
ioptions.table_properties_collector_factories) {
table_properties_collectors.emplace_back(
collector_factories->CreateTablePropertiesCollector());
}
@ -443,11 +447,13 @@ struct BlockBasedTableBuilder::Rep {
};
BlockBasedTableBuilder::BlockBasedTableBuilder(
const Options& options, const BlockBasedTableOptions& table_options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator, WritableFile* file,
CompressionType compression_type)
: rep_(new Rep(options, table_options, internal_comparator,
file, compression_type)) {
const CompressionType compression_type,
const CompressionOptions& compression_opts)
: rep_(new Rep(ioptions, table_options, internal_comparator,
file, compression_type, compression_opts)) {
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}
@ -502,7 +508,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
r->index_builder->OnKeyAdded(key);
NotifyCollectTableCollectorsOnAdd(key, value, r->table_properties_collectors,
r->options.info_log.get());
r->ioptions.info_log);
}
void BlockBasedTableBuilder::Flush() {
@ -540,10 +546,10 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
Slice block_contents;
if (raw_block_contents.size() < kCompressionSizeLimit) {
block_contents =
CompressBlock(raw_block_contents, r->options.compression_opts, &type,
CompressBlock(raw_block_contents, r->compression_opts, &type,
&r->compressed_output);
} else {
RecordTick(r->options.statistics.get(), NUMBER_BLOCK_NOT_COMPRESSED);
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED);
type = kNoCompression;
block_contents = raw_block_contents;
}
@ -555,8 +561,7 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
CompressionType type,
BlockHandle* handle) {
Rep* r = rep_;
StopWatch sw(r->options.env, r->options.statistics.get(),
WRITE_RAW_BLOCK_MICROS);
StopWatch sw(r->ioptions.env, r->ioptions.statistics, WRITE_RAW_BLOCK_MICROS);
handle->set_offset(r->offset);
handle->set_size(block_contents.size());
r->status = r->file->Append(block_contents);
@ -717,7 +722,7 @@ Status BlockBasedTableBuilder::Finish() {
// Add use collected properties
NotifyCollectTableCollectorsOnFinish(r->table_properties_collectors,
r->options.info_log.get(),
r->ioptions.info_log,
&property_block_builder);
BlockHandle properties_block_handle;
@ -776,14 +781,12 @@ Status BlockBasedTableBuilder::Finish() {
}
}
Log(
r->options.info_log,
Log(r->ioptions.info_log,
"Table was constructed:\n"
" [basic properties]: %s\n"
" [user collected properties]: %s",
r->props.ToString().c_str(),
user_collected.c_str()
);
user_collected.c_str());
}
return r->status;

@ -28,10 +28,12 @@ class BlockBasedTableBuilder : public TableBuilder {
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish().
BlockBasedTableBuilder(const Options& options,
BlockBasedTableBuilder(const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type);
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts);
// REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder();

@ -41,21 +41,24 @@ BlockBasedTableFactory::BlockBasedTableFactory(
}
Status BlockBasedTableFactory::NewTableReader(
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return BlockBasedTable::Open(options, soptions, table_options_,
return BlockBasedTable::Open(ioptions, soptions, table_options_,
internal_comparator, std::move(file), file_size,
table_reader);
}
TableBuilder* BlockBasedTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const {
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
auto table_builder = new BlockBasedTableBuilder(
options, table_options_, internal_comparator, file, compression_type);
ioptions, table_options_, internal_comparator, file,
compression_type, compression_opts);
return table_builder;
}

@ -14,13 +14,11 @@
#include <string>
#include "rocksdb/flush_block_policy.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "db/dbformat.h"
namespace rocksdb {
struct Options;
struct EnvOptions;
using std::unique_ptr;
@ -35,14 +33,17 @@ class BlockBasedTableFactory : public TableFactory {
const char* Name() const override { return "BlockBasedTable"; }
Status NewTableReader(const Options& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
Status NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const override;
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {

@ -336,15 +336,16 @@ class HashIndexReader : public IndexReader {
struct BlockBasedTable::Rep {
Rep(const EnvOptions& storage_options,
Rep(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const BlockBasedTableOptions& table_opt,
const InternalKeyComparator& internal_comparator)
: soptions(storage_options), table_options(table_opt),
: ioptions(ioptions), env_options(env_options), table_options(table_opt),
filter_policy(table_opt.filter_policy.get()),
internal_comparator(internal_comparator) {}
Options options;
const EnvOptions& soptions;
const ImmutableCFOptions& ioptions;
const EnvOptions& env_options;
const BlockBasedTableOptions& table_options;
const FilterPolicy* const filter_policy;
const InternalKeyComparator& internal_comparator;
@ -446,7 +447,8 @@ void BlockBasedTable::GenerateCachePrefix(Cache* cc,
}
}
Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file,
@ -461,8 +463,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
// We've successfully read the footer and the index block: we're
// ready to serve requests.
Rep* rep = new BlockBasedTable::Rep(
soptions, table_options, internal_comparator);
rep->options = options;
ioptions, env_options, table_options, internal_comparator);
rep->file = std::move(file);
rep->footer = footer;
rep->index_type = table_options.index_type;
@ -484,7 +485,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
TableProperties* table_properties = nullptr;
if (s.ok()) {
s = ReadProperties(meta_iter->value(), rep->file.get(), rep->footer,
rep->options.env, rep->options.info_log.get(),
rep->ioptions.env, rep->ioptions.info_log,
&table_properties);
}
@ -492,12 +493,12 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
auto err_msg =
"[Warning] Encountered error while reading data from properties "
"block " + s.ToString();
Log(rep->options.info_log, "%s", err_msg.c_str());
Log(rep->ioptions.info_log, "%s", err_msg.c_str());
} else {
rep->table_properties.reset(table_properties);
}
} else {
Log(WARN_LEVEL, rep->options.info_log,
Log(WARN_LEVEL, rep->ioptions.info_log,
"Cannot find Properties block from file.");
}
@ -546,7 +547,8 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
}
void BlockBasedTable::SetupForCompaction() {
switch (rep_->options.access_hint_on_compaction_start) {
/*
switch (.access_hint_on_compaction_start) {
case Options::NONE:
break;
case Options::NORMAL:
@ -562,6 +564,7 @@ void BlockBasedTable::SetupForCompaction() {
assert(false);
}
compaction_optimized_ = true;
*/
}
std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
@ -596,13 +599,13 @@ Status BlockBasedTable::ReadMetaBlock(
ReadOptions(),
rep->footer.metaindex_handle(),
&meta,
rep->options.env);
rep->ioptions.env);
if (!s.ok()) {
auto err_msg =
"[Warning] Encountered error while reading data from properties"
"block " + s.ToString();
Log(rep->options.info_log, "%s", err_msg.c_str());
Log(rep->ioptions.info_log, "%s", err_msg.c_str());
}
if (!s.ok()) {
delete meta;
@ -746,7 +749,7 @@ FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle,
ReadOptions opt;
BlockContents block;
if (!ReadBlockContents(rep->file.get(), rep->footer, opt, filter_handle,
&block, rep->options.env, false).ok()) {
&block, rep->ioptions.env, false).ok()) {
return nullptr;
}
@ -755,7 +758,8 @@ FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle,
}
return new FilterBlockReader(
rep->options, rep->table_options, block.data, block.heap_allocated);
rep->ioptions.prefix_extractor, rep->table_options,
block.data, block.heap_allocated);
}
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
@ -780,7 +784,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
cache_key
);
Statistics* statistics = rep_->options.statistics.get();
Statistics* statistics = rep_->ioptions.statistics;
auto cache_handle =
GetEntryFromCache(block_cache, key, BLOCK_CACHE_FILTER_MISS,
BLOCK_CACHE_FILTER_HIT, statistics);
@ -830,7 +834,7 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options,
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->footer.index_handle(), cache_key);
Statistics* statistics = rep_->options.statistics.get();
Statistics* statistics = rep_->ioptions.statistics;
auto cache_handle =
GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS,
BLOCK_CACHE_INDEX_HIT, statistics);
@ -906,7 +910,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
// If either block cache is enabled, we'll try to read from it.
if (block_cache != nullptr || block_cache_compressed != nullptr) {
Statistics* statistics = rep->options.statistics.get();
Statistics* statistics = rep->ioptions.statistics;
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
Slice key, /* key to the block cache */
@ -930,9 +934,9 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
if (block.value == nullptr && !no_io && ro.fill_cache) {
Block* raw_block = nullptr;
{
StopWatch sw(rep->options.env, statistics, READ_BLOCK_GET_MICROS);
StopWatch sw(rep->ioptions.env, statistics, READ_BLOCK_GET_MICROS);
s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle,
&raw_block, rep->options.env,
&raw_block, rep->ioptions.env,
block_cache_compressed == nullptr);
}
@ -955,7 +959,7 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
}
}
s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle,
&block.value, rep->options.env);
&block.value, rep->ioptions.env);
}
Iterator* iter;
@ -982,7 +986,8 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
public:
BlockEntryIteratorState(BlockBasedTable* table,
const ReadOptions& read_options)
: TwoLevelIteratorState(table->rep_->options.prefix_extractor != nullptr),
: TwoLevelIteratorState(
table->rep_->ioptions.prefix_extractor != nullptr),
table_(table),
read_options_(read_options) {}
@ -1020,8 +1025,8 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
return true;
}
assert(rep_->options.prefix_extractor != nullptr);
auto prefix = rep_->options.prefix_extractor->Transform(
assert(rep_->ioptions.prefix_extractor != nullptr);
auto prefix = rep_->ioptions.prefix_extractor->Transform(
ExtractUserKey(internal_key));
InternalKey internal_key_prefix(prefix, 0, kTypeValue);
auto internal_prefix = internal_key_prefix.Encode();
@ -1072,7 +1077,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
filter_entry.Release(rep_->table_options.block_cache.get());
}
Statistics* statistics = rep_->options.statistics.get();
Statistics* statistics = rep_->ioptions.statistics;
RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
if (!may_match) {
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
@ -1111,7 +1116,7 @@ Status BlockBasedTable::Get(
// Not found
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
RecordTick(rep_->options.statistics.get(), BLOOM_FILTER_USEFUL);
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
break;
} else {
BlockIter biter;
@ -1205,13 +1210,13 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader,
}
auto file = rep_->file.get();
auto env = rep_->options.env;
auto env = rep_->ioptions.env;
auto comparator = &rep_->internal_comparator;
const Footer& footer = rep_->footer;
if (index_type_on_file == BlockBasedTableOptions::kHashSearch &&
rep_->options.prefix_extractor == nullptr) {
Log(rep_->options.info_log,
rep_->ioptions.prefix_extractor == nullptr) {
Log(rep_->ioptions.info_log,
"BlockBasedTableOptions::kHashSearch requires "
"options.prefix_extractor to be set."
" Fall back to binary seach index.");
@ -1232,7 +1237,7 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader,
if (!s.ok()) {
// we simply fall back to binary search in case there is any
// problem with prefix hash index loading.
Log(rep_->options.info_log,
Log(rep_->ioptions.info_log,
"Unable to read the metaindex block."
" Fall back to binary seach index.");
return BinarySearchIndexReader::Create(
@ -1244,7 +1249,7 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader,
// We need to wrap data with internal_prefix_transform to make sure it can
// handle prefix correctly.
rep_->internal_prefix_transform.reset(
new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
new InternalKeySliceTransform(rep_->ioptions.prefix_extractor));
return HashIndexReader::Create(
rep_->internal_prefix_transform.get(), footer, file, env, comparator,
footer.index_handle(), meta_index_iter, index_reader,

@ -14,6 +14,7 @@
#include <utility>
#include <string>
#include "rocksdb/options.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
@ -36,7 +37,6 @@ class TableReader;
class WritableFile;
struct BlockBasedTableOptions;
struct EnvOptions;
struct Options;
struct ReadOptions;
using std::unique_ptr;
@ -58,7 +58,8 @@ class BlockBasedTable : public TableReader {
// to nullptr and returns a non-ok status.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& db_options, const EnvOptions& env_options,
static Status Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_key_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,

@ -210,8 +210,8 @@ Status BlockPrefixIndex::Create(const SliceTransform* internal_prefix_extractor,
return s;
}
const uint32_t BlockPrefixIndex::GetBlocks(const Slice& key,
uint32_t** blocks) {
uint32_t BlockPrefixIndex::GetBlocks(const Slice& key,
uint32_t** blocks) {
Slice prefix = internal_prefix_extractor_->Transform(key);
uint32_t bucket = PrefixToBucket(prefix, num_buckets_);

@ -23,7 +23,7 @@ class BlockPrefixIndex {
// the key, based on the prefix.
// Returns the total number of relevant blocks, 0 means the key does
// not exist.
const uint32_t GetBlocks(const Slice& key, uint32_t** blocks);
uint32_t GetBlocks(const Slice& key, uint32_t** blocks);
size_t ApproximateMemoryUsage() const {
return sizeof(BlockPrefixIndex) +

@ -56,7 +56,6 @@ CuckooTableBuilder::CuckooTableBuilder(
ucomp_(user_comparator),
get_slice_hash_(get_slice_hash),
closed_(false) {
properties_.num_entries = 0;
// Data is in a huge block.
properties_.num_data_blocks = 1;
properties_.index_size = 0;
@ -64,7 +63,7 @@ CuckooTableBuilder::CuckooTableBuilder(
}
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
if (properties_.num_entries >= kMaxVectorIdx - 1) {
if (kvs_.size() >= kMaxVectorIdx - 1) {
status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
return;
}
@ -311,7 +310,7 @@ uint64_t CuckooTableBuilder::NumEntries() const {
uint64_t CuckooTableBuilder::FileSize() const {
if (closed_) {
return file_->GetFileSize();
} else if (properties_.num_entries == 0) {
} else if (kvs_.size() == 0) {
return 0;
}

@ -135,6 +135,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
4, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
ASSERT_EQ(0UL, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
CheckFileContents({}, {}, {}, "", 0, 2, false);
@ -155,6 +156,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionFullKey";
@ -167,10 +169,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
@ -192,6 +196,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionFullKey";
@ -204,10 +209,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
@ -229,6 +236,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
uint32_t cuckoo_block_size = 2;
@ -242,10 +250,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
@ -272,6 +282,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKey";
@ -284,10 +295,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
@ -311,6 +324,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
@ -323,10 +337,12 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
@ -344,6 +360,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
{user_keys[3], {3, 4, 5, 6}}
};
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionUserKey";
@ -356,10 +373,12 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
@ -377,6 +396,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
{user_keys[3], {0, 1, 2, 3}},
};
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionUserKey";
@ -389,10 +409,12 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
@ -412,6 +434,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
{user_keys[4], {0, 2}},
};
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathUserKey";
@ -424,10 +447,12 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
uint32_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,

@ -11,11 +11,12 @@
#include "table/cuckoo_table_reader.h"
namespace rocksdb {
Status CuckooTableFactory::NewTableReader(const Options& options,
const EnvOptions& soptions, const InternalKeyComparator& icomp,
Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const InternalKeyComparator& icomp,
std::unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table) const {
std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(options,
std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(ioptions,
std::move(file), file_size, icomp.user_comparator(), nullptr));
Status s = new_reader->status();
if (s.ok()) {
@ -25,10 +26,13 @@ Status CuckooTableFactory::NewTableReader(const Options& options,
}
TableBuilder* CuckooTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const {
return new CuckooTableBuilder(file, hash_table_ratio_, 64, max_search_depth_,
internal_comparator.user_comparator(), cuckoo_block_size_, nullptr);
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
return new CuckooTableBuilder(file, hash_table_ratio_, 64,
max_search_depth_, internal_comparator.user_comparator(),
cuckoo_block_size_, nullptr);
}
std::string CuckooTableFactory::GetPrintableTableOptions() const {

@ -9,6 +9,7 @@
#include <string>
#include "rocksdb/table.h"
#include "util/murmurhash.h"
#include "rocksdb/options.h"
namespace rocksdb {
@ -45,14 +46,14 @@ class CuckooTableFactory : public TableFactory {
const char* Name() const override { return "CuckooTable"; }
Status NewTableReader(
const Options& options, const EnvOptions& soptions,
const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(const Options& options,
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator, WritableFile* file,
CompressionType compression_type) const override;
const CompressionType, const CompressionOptions&) const override;
// Sanitizes the specified DB Options.
Status SanitizeDBOptions(const DBOptions* db_opts) const override {

@ -29,7 +29,7 @@ namespace {
extern const uint64_t kCuckooTableMagicNumber;
CuckooTableReader::CuckooTableReader(
const Options& options,
const ImmutableCFOptions& ioptions,
std::unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
const Comparator* comparator,
@ -37,12 +37,12 @@ CuckooTableReader::CuckooTableReader(
: file_(std::move(file)),
ucomp_(comparator),
get_slice_hash_(get_slice_hash) {
if (!options.allow_mmap_reads) {
if (!ioptions.allow_mmap_reads) {
status_ = Status::InvalidArgument("File is not mmaped");
}
TableProperties* props = nullptr;
status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber,
options.env, options.info_log.get(), &props);
ioptions.env, ioptions.info_log, &props);
if (!status_.ok()) {
return;
}

@ -16,6 +16,7 @@
#include "db/dbformat.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "table/table_reader.h"
namespace rocksdb {
@ -26,7 +27,7 @@ class TableReader;
class CuckooTableReader: public TableReader {
public:
CuckooTableReader(
const Options& options,
const ImmutableCFOptions& ioptions,
std::unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
const Comparator* user_comparator,
@ -40,7 +41,7 @@ class CuckooTableReader: public TableReader {
Status status() const { return status_; }
Status Get(
const ReadOptions& readOptions, const Slice& key, void* handle_context,
const ReadOptions& read_options, const Slice& key, void* handle_context,
bool (*result_handler)(void* arg, const ParsedInternalKey& k,
const Slice& v),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr)

@ -11,7 +11,10 @@ int main() {
}
#else
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <gflags/gflags.h>
#include <vector>
@ -121,8 +124,9 @@ class CuckooReaderTest {
// Check reader now.
std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
const ImmutableCFOptions ioptions(options);
CuckooTableReader reader(
options,
ioptions,
std::move(read_file),
file_size,
ucomp,
@ -147,8 +151,9 @@ class CuckooReaderTest {
void CheckIterator(const Comparator* ucomp = BytewiseComparator()) {
std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
const ImmutableCFOptions ioptions(options);
CuckooTableReader reader(
options,
ioptions,
std::move(read_file),
file_size,
ucomp,
@ -325,8 +330,9 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
CreateCuckooFileAndCheckReader();
std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
const ImmutableCFOptions ioptions(options);
CuckooTableReader reader(
options,
ioptions,
std::move(read_file),
file_size,
BytewiseComparator(),
@ -433,8 +439,9 @@ void WriteFile(const std::vector<std::string>& keys,
std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
const ImmutableCFOptions ioptions(options);
CuckooTableReader reader(
options, std::move(read_file), file_size,
ioptions, std::move(read_file), file_size,
test::Uint64Comparator(), nullptr);
ASSERT_OK(reader.status());
ReadOptions r_options;
@ -460,8 +467,9 @@ void ReadKeys(uint64_t num, uint32_t batch_size) {
std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
const ImmutableCFOptions ioptions(options);
CuckooTableReader reader(
options, std::move(read_file), file_size, test::Uint64Comparator(),
ioptions, std::move(read_file), file_size, test::Uint64Comparator(),
nullptr);
ASSERT_OK(reader.status());
const UserCollectedProperties user_props =

@ -21,11 +21,11 @@ namespace rocksdb {
static const size_t kFilterBaseLg = 11;
static const size_t kFilterBase = 1 << kFilterBaseLg;
FilterBlockBuilder::FilterBlockBuilder(const Options& opt,
FilterBlockBuilder::FilterBlockBuilder(const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt,
const Comparator* internal_comparator)
: policy_(table_opt.filter_policy.get()),
prefix_extractor_(opt.prefix_extractor.get()),
prefix_extractor_(prefix_extractor),
whole_key_filtering_(table_opt.whole_key_filtering),
comparator_(internal_comparator) {}
@ -126,10 +126,11 @@ void FilterBlockBuilder::GenerateFilter() {
}
FilterBlockReader::FilterBlockReader(
const Options& opt, const BlockBasedTableOptions& table_opt,
const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt,
const Slice& contents, bool delete_contents_after_use)
: policy_(table_opt.filter_policy.get()),
prefix_extractor_(opt.prefix_extractor.get()),
prefix_extractor_(prefix_extractor),
whole_key_filtering_(table_opt.whole_key_filtering),
data_(nullptr),
offset_(nullptr),

@ -18,7 +18,6 @@
#include <stdint.h>
#include <string>
#include <vector>
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
@ -36,7 +35,7 @@ class FilterPolicy;
// (StartBlock AddKey*)* Finish
class FilterBlockBuilder {
public:
explicit FilterBlockBuilder(const Options& opt,
explicit FilterBlockBuilder(const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt,
const Comparator* internal_comparator);
@ -71,7 +70,7 @@ class FilterBlockReader {
public:
// REQUIRES: "contents" and *policy must stay live while *this is live.
FilterBlockReader(
const Options& opt,
const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt,
const Slice& contents,
bool delete_contents_after_use = false);

@ -45,26 +45,26 @@ class TestHashFilter : public FilterPolicy {
class FilterBlockTest {
public:
Options options_;
const Comparator* comparator_;
BlockBasedTableOptions table_options_;
FilterBlockTest() {
options_ = Options();
FilterBlockTest()
: comparator_(BytewiseComparator()) {
table_options_.filter_policy.reset(new TestHashFilter());
}
};
TEST(FilterBlockTest, EmptyBuilder) {
FilterBlockBuilder builder(options_, table_options_, options_.comparator);
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
Slice block = builder.Finish();
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
FilterBlockReader reader(options_, table_options_, block);
FilterBlockReader reader(nullptr, table_options_, block);
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
}
TEST(FilterBlockTest, SingleChunk) {
FilterBlockBuilder builder(options_, table_options_, options_.comparator);
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
builder.StartBlock(100);
builder.AddKey("foo");
builder.AddKey("bar");
@ -74,7 +74,7 @@ TEST(FilterBlockTest, SingleChunk) {
builder.StartBlock(300);
builder.AddKey("hello");
Slice block = builder.Finish();
FilterBlockReader reader(options_, table_options_, block);
FilterBlockReader reader(nullptr, table_options_, block);
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
@ -85,7 +85,7 @@ TEST(FilterBlockTest, SingleChunk) {
}
TEST(FilterBlockTest, MultiChunk) {
FilterBlockBuilder builder(options_, table_options_, options_.comparator);
FilterBlockBuilder builder(nullptr, table_options_, comparator_);
// First filter
builder.StartBlock(0);
@ -105,7 +105,7 @@ TEST(FilterBlockTest, MultiChunk) {
builder.AddKey("hello");
Slice block = builder.Finish();
FilterBlockReader reader(options_, table_options_, block);
FilterBlockReader reader(nullptr, table_options_, block);
// Check first filter
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));

@ -58,24 +58,24 @@ extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
PlainTableBuilder::PlainTableBuilder(
const Options& options, WritableFile* file, uint32_t user_key_len,
EncodingType encoding_type, size_t index_sparseness,
const ImmutableCFOptions& ioptions, WritableFile* file,
uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness,
uint32_t bloom_bits_per_key, uint32_t num_probes, size_t huge_page_tlb_size,
double hash_table_ratio, bool store_index_in_file)
: options_(options),
: ioptions_(ioptions),
bloom_block_(num_probes),
file_(file),
bloom_bits_per_key_(bloom_bits_per_key),
huge_page_tlb_size_(huge_page_tlb_size),
encoder_(encoding_type, user_key_len, options.prefix_extractor.get(),
encoder_(encoding_type, user_key_len, ioptions.prefix_extractor,
index_sparseness),
store_index_in_file_(store_index_in_file),
prefix_extractor_(options.prefix_extractor.get()) {
prefix_extractor_(ioptions.prefix_extractor) {
// Build index block and save it in the file if hash_table_ratio > 0
if (store_index_in_file_) {
assert(hash_table_ratio > 0 || IsTotalOrderMode());
index_builder_.reset(
new PlainTableIndexBuilder(&arena_, options, index_sparseness,
new PlainTableIndexBuilder(&arena_, ioptions, index_sparseness,
hash_table_ratio, huge_page_tlb_size_));
assert(bloom_bits_per_key_ > 0);
properties_.user_collected_properties
@ -93,10 +93,10 @@ PlainTableBuilder::PlainTableBuilder(
// plain encoding.
properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
if (options_.prefix_extractor) {
if (ioptions_.prefix_extractor) {
properties_.user_collected_properties
[PlainTablePropertyNames::kPrefixExtractorName] =
options_.prefix_extractor->Name();
ioptions_.prefix_extractor->Name();
}
std::string val;
@ -105,7 +105,7 @@ PlainTableBuilder::PlainTableBuilder(
[PlainTablePropertyNames::kEncodingType] = val;
for (auto& collector_factories :
options.table_properties_collector_factories) {
ioptions.table_properties_collector_factories) {
table_properties_collectors_.emplace_back(
collector_factories->CreateTablePropertiesCollector());
}
@ -124,11 +124,11 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
// Store key hash
if (store_index_in_file_) {
if (options_.prefix_extractor.get() == nullptr) {
if (ioptions_.prefix_extractor == nullptr) {
keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
} else {
Slice prefix =
options_.prefix_extractor->Transform(internal_key.user_key);
ioptions_.prefix_extractor->Transform(internal_key.user_key);
keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
}
}
@ -160,7 +160,7 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
// notify property collectors
NotifyCollectTableCollectorsOnAdd(key, value, table_properties_collectors_,
options_.info_log.get());
ioptions_.info_log);
}
Status PlainTableBuilder::status() const { return status_; }
@ -183,7 +183,8 @@ Status PlainTableBuilder::Finish() {
if (store_index_in_file_ && (properties_.num_entries > 0)) {
bloom_block_.SetTotalBits(
&arena_, properties_.num_entries * bloom_bits_per_key_,
options_.bloom_locality, huge_page_tlb_size_, options_.info_log.get());
ioptions_.bloom_locality, huge_page_tlb_size_,
ioptions_.info_log);
PutVarint32(&properties_.user_collected_properties
[PlainTablePropertyNames::kNumBloomBlocks],
@ -224,7 +225,7 @@ Status PlainTableBuilder::Finish() {
// -- Add user collected properties
NotifyCollectTableCollectorsOnFinish(table_properties_collectors_,
options_.info_log.get(),
ioptions_.info_log,
&property_block_builder);
// -- Write property block

@ -30,7 +30,7 @@ class PlainTableBuilder: public TableBuilder {
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
PlainTableBuilder(const Options& options, WritableFile* file,
PlainTableBuilder(const ImmutableCFOptions& ioptions, WritableFile* file,
uint32_t user_key_size, EncodingType encoding_type,
size_t index_sparseness, uint32_t bloom_bits_per_key,
uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
@ -71,7 +71,7 @@ class PlainTableBuilder: public TableBuilder {
private:
Arena arena_;
Options options_;
const ImmutableCFOptions& ioptions_;
std::vector<std::unique_ptr<TablePropertiesCollector>>
table_properties_collectors_;

@ -14,22 +14,24 @@
namespace rocksdb {
Status PlainTableFactory::NewTableReader(const Options& options,
const EnvOptions& soptions,
Status PlainTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& icomp,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<TableReader>* table) const {
return PlainTableReader::Open(options, soptions, icomp, std::move(file),
return PlainTableReader::Open(ioptions, env_options, icomp, std::move(file),
file_size, table, bloom_bits_per_key_,
hash_table_ratio_, index_sparseness_,
huge_page_tlb_size_, full_scan_mode_);
}
TableBuilder* PlainTableFactory::NewTableBuilder(
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const {
return new PlainTableBuilder(options, file, user_key_len_, encoding_type_,
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
return new PlainTableBuilder(ioptions, file, user_key_len_, encoding_type_,
index_sparseness_, bloom_bits_per_key_, 6,
huge_page_tlb_size_, hash_table_ratio_,
store_index_in_file_);

@ -14,7 +14,6 @@
namespace rocksdb {
struct Options;
struct EnvOptions;
using std::unique_ptr;
@ -154,15 +153,17 @@ class PlainTableFactory : public TableFactory {
full_scan_mode_(options.full_scan_mode),
store_index_in_file_(options.store_index_in_file) {}
const char* Name() const override { return "PlainTable"; }
Status NewTableReader(const Options& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(const Options& options,
const InternalKeyComparator& icomparator,
WritableFile* file,
CompressionType compression_type) const
override;
Status NewTableReader(
const ImmutableCFOptions& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType,
const CompressionOptions&) const override;
std::string GetPrintableTableOptions() const override;

@ -93,7 +93,7 @@ Slice PlainTableIndexBuilder::Finish() {
BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);
keys_per_prefix_hist_.Add(num_keys_per_prefix_);
Log(options_.info_log, "Number of Keys per prefix Histogram: %s",
Log(ioptions_.info_log, "Number of Keys per prefix Histogram: %s",
keys_per_prefix_hist_.ToString().c_str());
// From the temp data structure, populate indexes.
@ -147,11 +147,11 @@ void PlainTableIndexBuilder::BucketizeIndexes(
Slice PlainTableIndexBuilder::FillIndexes(
const std::vector<IndexRecord*>& hash_to_offsets,
const std::vector<uint32_t>& entries_per_bucket) {
Log(options_.info_log, "Reserving %zu bytes for plain table's sub_index",
Log(ioptions_.info_log, "Reserving %zu bytes for plain table's sub_index",
sub_index_size_);
auto total_allocate_size = GetTotalSize();
char* allocated = arena_->AllocateAligned(
total_allocate_size, huge_page_tlb_size_, options_.info_log.get());
total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);
auto temp_ptr = EncodeVarint32(allocated, index_size_);
uint32_t* index =
@ -191,7 +191,7 @@ Slice PlainTableIndexBuilder::FillIndexes(
}
assert(sub_index_offset == sub_index_size_);
Log(options_.info_log, "hash table size: %d, suffix_map length %zu",
Log(ioptions_.info_log, "hash table size: %d, suffix_map length %zu",
index_size_, sub_index_size_);
return Slice(allocated, GetTotalSize());
}

@ -108,11 +108,11 @@ class PlainTableIndex {
// #wiki-in-memory-index-format
class PlainTableIndexBuilder {
public:
PlainTableIndexBuilder(Arena* arena, const Options& options,
PlainTableIndexBuilder(Arena* arena, const ImmutableCFOptions& ioptions,
uint32_t index_sparseness, double hash_table_ratio,
double huge_page_tlb_size)
: arena_(arena),
options_(options),
ioptions_(ioptions),
record_list_(kRecordsPerGroup),
is_first_record_(true),
due_index_(false),
@ -120,7 +120,7 @@ class PlainTableIndexBuilder {
num_keys_per_prefix_(0),
prev_key_prefix_hash_(0),
index_sparseness_(index_sparseness),
prefix_extractor_(options.prefix_extractor.get()),
prefix_extractor_(ioptions.prefix_extractor),
hash_table_ratio_(hash_table_ratio),
huge_page_tlb_size_(huge_page_tlb_size) {}
@ -196,7 +196,7 @@ class PlainTableIndexBuilder {
const std::vector<uint32_t>& entries_per_bucket);
Arena* arena_;
Options options_;
const ImmutableCFOptions ioptions_;
HistogramImpl keys_per_prefix_hist_;
IndexRecordList record_list_;
bool is_first_record_;

@ -87,7 +87,7 @@ class PlainTableIterator : public Iterator {
};
extern const uint64_t kPlainTableMagicNumber;
PlainTableReader::PlainTableReader(const Options& options,
PlainTableReader::PlainTableReader(const ImmutableCFOptions& ioptions,
unique_ptr<RandomAccessFile>&& file,
const EnvOptions& storage_options,
const InternalKeyComparator& icomparator,
@ -99,10 +99,10 @@ PlainTableReader::PlainTableReader(const Options& options,
full_scan_mode_(false),
data_end_offset_(table_properties->data_size),
user_key_len_(table_properties->fixed_key_len),
prefix_extractor_(options.prefix_extractor.get()),
prefix_extractor_(ioptions.prefix_extractor),
enable_bloom_(false),
bloom_(6, nullptr),
options_(options),
ioptions_(ioptions),
file_(std::move(file)),
file_size_(file_size),
table_properties_(nullptr) {}
@ -110,8 +110,8 @@ PlainTableReader::PlainTableReader(const Options& options,
PlainTableReader::~PlainTableReader() {
}
Status PlainTableReader::Open(const Options& options,
const EnvOptions& soptions,
Status PlainTableReader::Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
@ -119,14 +119,14 @@ Status PlainTableReader::Open(const Options& options,
const int bloom_bits_per_key,
double hash_table_ratio, size_t index_sparseness,
size_t huge_page_tlb_size, bool full_scan_mode) {
assert(options.allow_mmap_reads);
assert(ioptions.allow_mmap_reads);
if (file_size > PlainTableIndex::kMaxFileSize) {
return Status::NotSupported("File is too large for PlainTableReader!");
}
TableProperties* props = nullptr;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
options.env, options.info_log.get(), &props);
ioptions.env, ioptions.info_log, &props);
if (!s.ok()) {
return s;
}
@ -137,12 +137,12 @@ Status PlainTableReader::Open(const Options& options,
user_props.find(PlainTablePropertyNames::kPrefixExtractorName);
if (!full_scan_mode && prefix_extractor_in_file != user_props.end()) {
if (!options.prefix_extractor) {
if (!ioptions.prefix_extractor) {
return Status::InvalidArgument(
"Prefix extractor is missing when opening a PlainTable built "
"using a prefix extractor");
} else if (prefix_extractor_in_file->second.compare(
options.prefix_extractor->Name()) != 0) {
ioptions.prefix_extractor->Name()) != 0) {
return Status::InvalidArgument(
"Prefix extractor given doesn't match the one used to build "
"PlainTable");
@ -158,8 +158,8 @@ Status PlainTableReader::Open(const Options& options,
}
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
options, std::move(file), soptions, internal_comparator, encoding_type,
file_size, props));
ioptions, std::move(file), env_options, internal_comparator,
encoding_type, file_size, props));
s = new_reader->MmapDataFile();
if (!s.ok()) {
@ -207,7 +207,7 @@ Status PlainTableReader::PopulateIndexRecordList(
bool is_first_record = true;
Slice key_prefix_slice;
PlainTableKeyDecoder decoder(encoding_type_, user_key_len_,
options_.prefix_extractor.get());
ioptions_.prefix_extractor);
while (pos < data_end_offset_) {
uint32_t key_offset = pos;
ParsedInternalKey key;
@ -252,8 +252,8 @@ void PlainTableReader::AllocateAndFillBloom(int bloom_bits_per_key,
uint32_t bloom_total_bits = num_prefixes * bloom_bits_per_key;
if (bloom_total_bits > 0) {
enable_bloom_ = true;
bloom_.SetTotalBits(&arena_, bloom_total_bits, options_.bloom_locality,
huge_page_tlb_size, options_.info_log.get());
bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
huge_page_tlb_size, ioptions_.info_log);
FillBloom(prefix_hashes);
}
}
@ -281,14 +281,14 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
BlockContents bloom_block_contents;
auto s = ReadMetaBlock(file_.get(), file_size_, kPlainTableMagicNumber,
options_.env, BloomBlockBuilder::kBloomBlock,
ioptions_.env, BloomBlockBuilder::kBloomBlock,
&bloom_block_contents);
bool index_in_file = s.ok();
BlockContents index_block_contents;
s = ReadMetaBlock(file_.get(), file_size_, kPlainTableMagicNumber,
options_.env, PlainTableIndexBuilder::kPlainTableIndexBlock,
&index_block_contents);
ioptions_.env, PlainTableIndexBuilder::kPlainTableIndexBlock,
&index_block_contents);
index_in_file &= s.ok();
@ -310,8 +310,9 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
index_block = nullptr;
}
if ((options_.prefix_extractor.get() == nullptr) && (hash_table_ratio != 0)) {
// options.prefix_extractor is requried for a hash-based look-up.
if ((ioptions_.prefix_extractor == nullptr) &&
(hash_table_ratio != 0)) {
// ioptions.prefix_extractor is requried for a hash-based look-up.
return Status::NotSupported(
"PlainTable requires a prefix extractor enable prefix hash mode.");
}
@ -328,8 +329,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
table_properties_->num_entries * bloom_bits_per_key;
if (num_bloom_bits > 0) {
enable_bloom_ = true;
bloom_.SetTotalBits(&arena_, num_bloom_bits, options_.bloom_locality,
huge_page_tlb_size, options_.info_log.get());
bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality,
huge_page_tlb_size, ioptions_.info_log);
}
}
} else {
@ -351,7 +352,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
bloom_block->size() * 8, num_blocks);
}
PlainTableIndexBuilder index_builder(&arena_, options_, index_sparseness,
PlainTableIndexBuilder index_builder(&arena_, ioptions_, index_sparseness,
hash_table_ratio, huge_page_tlb_size);
std::vector<uint32_t> prefix_hashes;
@ -422,7 +423,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
uint32_t file_offset = GetFixed32Element(base_ptr, mid);
size_t tmp;
Status s = PlainTableKeyDecoder(encoding_type_, user_key_len_,
options_.prefix_extractor.get())
ioptions_.prefix_extractor)
.NextKey(file_data_.data() + file_offset,
file_data_.data() + data_end_offset_, &mid_key,
nullptr, &tmp);
@ -451,7 +452,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
size_t tmp;
uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
Status s = PlainTableKeyDecoder(encoding_type_, user_key_len_,
options_.prefix_extractor.get())
ioptions_.prefix_extractor)
.NextKey(file_data_.data() + low_key_offset,
file_data_.data() + data_end_offset_, &low_key,
nullptr, &tmp);
@ -565,7 +566,7 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
}
Slice found_value;
PlainTableKeyDecoder decoder(encoding_type_, user_key_len_,
options_.prefix_extractor.get());
ioptions_.prefix_extractor);
while (offset < data_end_offset_) {
Status s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
if (!s.ok()) {

@ -52,7 +52,8 @@ extern const uint32_t kPlainTableVariableLength;
// The implementation of IndexedTableReader requires output file is mmaped
class PlainTableReader: public TableReader {
public:
static Status Open(const Options& options, const EnvOptions& soptions,
static Status Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table,
@ -82,8 +83,9 @@ class PlainTableReader: public TableReader {
return arena_.MemoryAllocatedBytes();
}
PlainTableReader(const Options& options, unique_ptr<RandomAccessFile>&& file,
const EnvOptions& storage_options,
PlainTableReader(const ImmutableCFOptions& ioptions,
unique_ptr<RandomAccessFile>&& file,
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
EncodingType encoding_type, uint64_t file_size,
const TableProperties* table_properties);
@ -132,7 +134,7 @@ class PlainTableReader: public TableReader {
DynamicBloom bloom_;
Arena arena_;
const Options& options_;
const ImmutableCFOptions& ioptions_;
unique_ptr<RandomAccessFile> file_;
uint32_t file_size_;
std::shared_ptr<const TableProperties> table_properties_;

@ -88,10 +88,12 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
TableBuilder* tb = nullptr;
DB* db = nullptr;
Status s;
const ImmutableCFOptions ioptions(opts);
if (!through_db) {
env->NewWritableFile(file_name, &file, env_options);
tb = opts.table_factory->NewTableBuilder(opts, ikc, file.get(),
CompressionType::kNoCompression);
tb = opts.table_factory->NewTableBuilder(ioptions, ikc, file.get(),
CompressionType::kNoCompression,
CompressionOptions());
} else {
s = DB::Open(opts, dbname, &db);
ASSERT_OK(s);
@ -122,7 +124,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
uint64_t file_size;
env->GetFileSize(file_name, &file_size);
s = opts.table_factory->NewTableReader(
opts, env_options, ikc, std::move(raf), file_size, &table_reader);
ioptions, env_options, ikc, std::move(raf), file_size, &table_reader);
}
Random rnd(301);

@ -42,6 +42,7 @@
#include "util/statistics.h"
#include "util/testharness.h"
#include "util/testutil.h"
#include "util/scoped_arena_iterator.h"
namespace rocksdb {
@ -194,6 +195,7 @@ class Constructor {
// been added so far. Returns the keys in sorted order in "*keys"
// and stores the key/value pairs in "*kvmap"
void Finish(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
std::vector<std::string>* keys, KVMap* kvmap) {
@ -206,12 +208,14 @@ class Constructor {
keys->push_back(it->first);
}
data_.clear();
Status s = FinishImpl(options, table_options, internal_comparator, *kvmap);
Status s = FinishImpl(options, ioptions, table_options,
internal_comparator, *kvmap);
ASSERT_TRUE(s.ok()) << s.ToString();
}
// Construct the data structure from the data in "data"
virtual Status FinishImpl(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
const KVMap& data) = 0;
@ -220,8 +224,12 @@ class Constructor {
virtual const KVMap& data() { return data_; }
virtual bool IsArenaMode() const { return false; }
virtual DB* db() const { return nullptr; } // Overridden in DBConstructor
virtual bool AnywayDeleteIterator() const { return false; }
protected:
const InternalKeyComparator* last_internal_key_;
@ -239,6 +247,7 @@ class BlockConstructor: public Constructor {
delete block_;
}
virtual Status FinishImpl(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
const KVMap& data) {
@ -275,8 +284,15 @@ class BlockConstructor: public Constructor {
// A helper class that converts internal format keys into user keys
class KeyConvertingIterator: public Iterator {
public:
explicit KeyConvertingIterator(Iterator* iter) : iter_(iter) { }
virtual ~KeyConvertingIterator() { delete iter_; }
KeyConvertingIterator(Iterator* iter, bool arena_mode = false)
: iter_(iter), arena_mode_(arena_mode) {}
virtual ~KeyConvertingIterator() {
if (arena_mode_) {
iter_->~Iterator();
} else {
delete iter_;
}
}
virtual bool Valid() const { return iter_->Valid(); }
virtual void Seek(const Slice& target) {
ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
@ -307,6 +323,7 @@ class KeyConvertingIterator: public Iterator {
private:
mutable Status status_;
Iterator* iter_;
bool arena_mode_;
// No copying allowed
KeyConvertingIterator(const KeyConvertingIterator&);
@ -322,14 +339,16 @@ class TableConstructor: public Constructor {
~TableConstructor() { Reset(); }
virtual Status FinishImpl(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
const KVMap& data) {
Reset();
sink_.reset(new StringSink());
unique_ptr<TableBuilder> builder;
builder.reset(options.table_factory->NewTableBuilder(
options, internal_comparator, sink_.get(), options.compression));
builder.reset(ioptions.table_factory->NewTableBuilder(
ioptions, internal_comparator, sink_.get(), options.compression,
CompressionOptions()));
for (KVMap::const_iterator it = data.begin();
it != data.end();
@ -352,9 +371,9 @@ class TableConstructor: public Constructor {
// Open the table
uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_,
options.allow_mmap_reads));
return options.table_factory->NewTableReader(
options, soptions, internal_comparator, std::move(source_),
ioptions.allow_mmap_reads));
return ioptions.table_factory->NewTableReader(
ioptions, soptions, internal_comparator, std::move(source_),
sink_->contents().size(), &table_reader_);
}
@ -372,12 +391,12 @@ class TableConstructor: public Constructor {
return table_reader_->ApproximateOffsetOf(key);
}
virtual Status Reopen(const Options& options) {
virtual Status Reopen(const ImmutableCFOptions& ioptions) {
source_.reset(
new StringSource(sink_->contents(), uniq_id_,
options.allow_mmap_reads));
return options.table_factory->NewTableReader(
options, soptions, *last_internal_key_, std::move(source_),
ioptions.allow_mmap_reads));
return ioptions.table_factory->NewTableReader(
ioptions, soptions, *last_internal_key_, std::move(source_),
sink_->contents().size(), &table_reader_);
}
@ -385,6 +404,10 @@ class TableConstructor: public Constructor {
return table_reader_.get();
}
virtual bool AnywayDeleteIterator() const override {
return convert_to_internal_key_;
}
private:
void Reset() {
uniq_id_ = 0;
@ -392,12 +415,12 @@ class TableConstructor: public Constructor {
sink_.reset();
source_.reset();
}
bool convert_to_internal_key_;
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_;
unique_ptr<TableReader> table_reader_;
bool convert_to_internal_key_;
TableConstructor();
@ -421,6 +444,7 @@ class MemTableConstructor: public Constructor {
delete memtable_->Unref();
}
virtual Status FinishImpl(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
const KVMap& data) {
@ -439,10 +463,16 @@ class MemTableConstructor: public Constructor {
return Status::OK();
}
virtual Iterator* NewIterator() const {
return new KeyConvertingIterator(memtable_->NewIterator(ReadOptions()));
return new KeyConvertingIterator(
memtable_->NewIterator(ReadOptions(), &arena_), true);
}
virtual bool AnywayDeleteIterator() const override { return true; }
virtual bool IsArenaMode() const override { return true; }
private:
mutable Arena arena_;
InternalKeyComparator internal_comparator_;
MemTable* memtable_;
std::shared_ptr<SkipListFactory> table_factory_;
@ -460,6 +490,7 @@ class DBConstructor: public Constructor {
delete db_;
}
virtual Status FinishImpl(const Options& options,
const ImmutableCFOptions& ioptions,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator,
const KVMap& data) {
@ -670,7 +701,7 @@ class FixedOrLessPrefixTransform : public SliceTransform {
class Harness {
public:
Harness() : constructor_(nullptr) { }
Harness() : ioptions_(options_), constructor_(nullptr) {}
void Init(const TestArgs& args) {
delete constructor_;
@ -756,6 +787,7 @@ class Harness {
constructor_ = new DBConstructor(options_.comparator);
break;
}
ioptions_ = ImmutableCFOptions(options_);
}
~Harness() {
@ -769,8 +801,8 @@ class Harness {
void Test(Random* rnd) {
std::vector<std::string> keys;
KVMap data;
constructor_->Finish(options_, table_options_, *internal_comparator_,
&keys, &data);
constructor_->Finish(options_, ioptions_, table_options_,
*internal_comparator_, &keys, &data);
TestForwardScan(keys, data);
if (support_prev_) {
@ -791,7 +823,11 @@ class Harness {
iter->Next();
}
ASSERT_TRUE(!iter->Valid());
delete iter;
if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
iter->~Iterator();
} else {
delete iter;
}
}
void TestBackwardScan(const std::vector<std::string>& keys,
@ -806,7 +842,11 @@ class Harness {
iter->Prev();
}
ASSERT_TRUE(!iter->Valid());
delete iter;
if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
iter->~Iterator();
} else {
delete iter;
}
}
void TestRandomAccess(Random* rnd,
@ -876,7 +916,11 @@ class Harness {
}
}
}
delete iter;
if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
iter->~Iterator();
} else {
delete iter;
}
}
std::string ToString(const KVMap& data, const KVMap::const_iterator& it) {
@ -939,6 +983,7 @@ class Harness {
private:
Options options_ = Options();
ImmutableCFOptions ioptions_;
BlockBasedTableOptions table_options_ = BlockBasedTableOptions();
Constructor* constructor_;
bool support_prev_;
@ -1038,7 +1083,8 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
table_options.block_restart_interval = 1;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
auto& props = *c.GetTableReader()->GetTableProperties();
@ -1071,7 +1117,8 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
Options options;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
auto& props = *c.GetTableReader()->GetTableProperties();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
@ -1122,7 +1169,8 @@ TEST(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
c.Add("cccc2", std::string('a', 56));
std::vector<std::string> keys;
KVMap kvmap;
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
auto props = c.GetTableReader()->GetTableProperties();
ASSERT_EQ(7u, props->num_data_blocks);
@ -1206,7 +1254,8 @@ TEST(TableTest, HashIndexTest) {
std::unique_ptr<InternalKeyComparator> comparator(
new InternalKeyComparator(BytewiseComparator()));
c.Finish(options, table_options, *comparator, &keys, &kvmap);
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap);
auto reader = c.GetTableReader();
auto props = reader->GetTableProperties();
@ -1314,7 +1363,8 @@ TEST(BlockBasedTableTest, IndexSizeStat) {
table_options.block_restart_interval = 1;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &ks, &kvmap);
auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
ASSERT_GT(index_size, last_index_size);
@ -1340,7 +1390,8 @@ TEST(BlockBasedTableTest, NumBlockStat) {
std::vector<std::string> ks;
KVMap kvmap;
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &ks, &kvmap);
ASSERT_EQ(kvmap.size(),
c.GetTableReader()->GetTableProperties()->num_data_blocks);
@ -1416,7 +1467,8 @@ TEST(BlockBasedTableTest, BlockCacheDisabledTest) {
TableConstructor c(BytewiseComparator(), true);
c.Add("key", "value");
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
// preloading filter/index blocks is enabled.
@ -1458,7 +1510,8 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
TableConstructor c(BytewiseComparator());
c.Add("key", "value");
c.Finish(options, table_options,
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options,
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
// preloading filter/index blocks is prohibited.
auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
@ -1512,7 +1565,8 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
table_options.block_cache.reset();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.statistics = CreateDBStatistics(); // reset the stats
c.Reopen(options);
const ImmutableCFOptions ioptions1(options);
c.Reopen(ioptions1);
table_options.no_block_cache = false;
{
@ -1529,7 +1583,8 @@ TEST(BlockBasedTableTest, FilterBlockInBlockCache) {
// too small to fit even one entry.
table_options.block_cache = NewLRUCache(1);
options.table_factory.reset(new BlockBasedTableFactory(table_options));
c.Reopen(options);
const ImmutableCFOptions ioptions2(options);
c.Reopen(ioptions2);
{
BlockCachePropertiesSnapshot props(options.statistics.get());
props.AssertEqual(1, // index block miss
@ -1583,7 +1638,8 @@ TEST(BlockBasedTableTest, BlockCacheLeak) {
c.Add("k07", std::string(100000, 'x'));
std::vector<std::string> keys;
KVMap kvmap;
c.Finish(opt, table_options, *ikc, &keys, &kvmap);
const ImmutableCFOptions ioptions(opt);
c.Finish(opt, ioptions, table_options, *ikc, &keys, &kvmap);
unique_ptr<Iterator> iter(c.NewIterator());
iter->SeekToFirst();
@ -1594,7 +1650,8 @@ TEST(BlockBasedTableTest, BlockCacheLeak) {
}
ASSERT_OK(iter->status());
ASSERT_OK(c.Reopen(opt));
const ImmutableCFOptions ioptions1(opt);
ASSERT_OK(c.Reopen(ioptions1));
auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
for (const std::string& key : keys) {
ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
@ -1603,7 +1660,8 @@ TEST(BlockBasedTableTest, BlockCacheLeak) {
// rerun with different block cache
table_options.block_cache = NewLRUCache(16 * 1024 * 1024);
opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_OK(c.Reopen(opt));
const ImmutableCFOptions ioptions2(opt);
ASSERT_OK(c.Reopen(ioptions2));
table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
for (const std::string& key : keys) {
ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
@ -1619,9 +1677,11 @@ TEST(PlainTableTest, BasicPlainTableProperties) {
PlainTableFactory factory(plain_table_options);
StringSink sink;
Options options;
const ImmutableCFOptions ioptions(options);
InternalKeyComparator ikc(options.comparator);
std::unique_ptr<TableBuilder> builder(
factory.NewTableBuilder(options, ikc, &sink, kNoCompression));
factory.NewTableBuilder(ioptions, ikc, &sink, kNoCompression,
CompressionOptions()));
for (char c = 'a'; c <= 'z'; ++c) {
std::string key(8, c);
@ -1664,7 +1724,9 @@ TEST(GeneralTableTest, ApproximateOffsetOfPlain) {
options.compression = kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
c.Finish(options, table_options, internal_comparator, &keys, &kvmap);
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options, internal_comparator,
&keys, &kvmap);
ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
@ -1694,7 +1756,8 @@ static void DoCompressionTest(CompressionType comp) {
options.compression = comp;
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
c.Finish(options, table_options, ikc, &keys, &kvmap);
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options, ikc, &keys, &kvmap);
ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0));
@ -1807,7 +1870,8 @@ TEST(MemTableTest, Simple) {
ColumnFamilyMemTablesDefault cf_mems_default(memtable, &options);
ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, &cf_mems_default).ok());
Iterator* iter = memtable->NewIterator(ReadOptions());
Arena arena;
ScopedArenaIterator iter(memtable->NewIterator(ReadOptions(), &arena));
iter->SeekToFirst();
while (iter->Valid()) {
fprintf(stderr, "key: '%s' -> '%s'\n",
@ -1816,7 +1880,6 @@ TEST(MemTableTest, Simple) {
iter->Next();
}
delete iter;
delete memtable->Unref();
}

@ -68,6 +68,7 @@ class SstFileReader {
// options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations)
Options options_;
const ImmutableCFOptions ioptions_;
InternalKeyComparator internal_comparator_;
unique_ptr<TableProperties> table_properties_;
};
@ -76,7 +77,8 @@ SstFileReader::SstFileReader(const std::string& file_path,
bool verify_checksum,
bool output_hex)
:file_name_(file_path), read_num_(0), verify_checksum_(verify_checksum),
output_hex_(output_hex), internal_comparator_(BytewiseComparator()) {
output_hex_(output_hex), ioptions_(options_),
internal_comparator_(BytewiseComparator()) {
fprintf(stdout, "Process %s\n", file_path.c_str());
init_result_ = NewTableReader(file_name_);
@ -123,7 +125,7 @@ Status SstFileReader::NewTableReader(const std::string& file_path) {
if (s.ok()) {
s = options_.table_factory->NewTableReader(
options_, soptions_, internal_comparator_, std::move(file_), file_size,
ioptions_, soptions_, internal_comparator_, std::move(file_), file_size,
&table_reader_);
}
return s;

@ -0,0 +1,257 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else
#include <sys/types.h>
#include <stdio.h>
#include <gflags/gflags.h>
#include "rocksdb/db.h"
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "port/port.h"
#include "util/mutexlock.h"
#include "util/random.h"
using GFLAGS::ParseCommandLineFlags;
static const uint32_t KB = 1024;
DEFINE_int32(threads, 10, "Number of concurrent threads to run.");
DEFINE_int64(cache_size, 2 * KB * KB * KB,
"Number of bytes to use as a cache of uncompressed data.");
DEFINE_int32(num_shard_bits, 4, "shard_bits.");
DEFINE_int64(max_key, 1 * KB* KB, "Max number of key to place in cache");
DEFINE_uint64(ops_per_thread, 1200000, "Number of operations per thread.");
DEFINE_int32(insert_percent, 40,
"Ratio of insert to total workload (expressed as a percentage)");
DEFINE_int32(lookup_percent, 50,
"Ratio of lookup to total workload (expressed as a percentage)");
DEFINE_int32(erase_percent, 10,
"Ratio of erase to total workload (expressed as a percentage)");
namespace rocksdb {
class CacheBench;
namespace {
void deleter(const Slice& key, void* value) {
delete reinterpret_cast<char *>(value);
}
// State shared by all concurrent executions of the same benchmark.
class SharedState {
public:
explicit SharedState(CacheBench* cache_bench)
: cv_(&mu_),
num_threads_(FLAGS_threads),
num_initialized_(0),
start_(false),
num_done_(0),
cache_bench_(cache_bench) {
}
~SharedState() {}
port::Mutex* GetMutex() {
return &mu_;
}
port::CondVar* GetCondVar() {
return &cv_;
}
CacheBench* GetCacheBench() const {
return cache_bench_;
}
void IncInitialized() {
num_initialized_++;
}
void IncDone() {
num_done_++;
}
bool AllInitialized() const {
return num_initialized_ >= num_threads_;
}
bool AllDone() const {
return num_done_ >= num_threads_;
}
void SetStart() {
start_ = true;
}
bool Started() const {
return start_;
}
private:
port::Mutex mu_;
port::CondVar cv_;
const uint64_t num_threads_;
uint64_t num_initialized_;
bool start_;
uint64_t num_done_;
CacheBench* cache_bench_;
};
// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
uint32_t tid;
Random rnd;
SharedState* shared;
ThreadState(uint32_t index, SharedState *shared)
: tid(index),
rnd(1000 + index),
shared(shared) {}
};
} // namespace
class CacheBench {
public:
CacheBench() :
cache_(NewLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits)),
num_threads_(FLAGS_threads) {}
~CacheBench() {}
bool Run() {
rocksdb::Env* env = rocksdb::Env::Default();
PrintEnv();
SharedState shared(this);
std::vector<ThreadState*> threads(num_threads_);
for (uint32_t i = 0; i < num_threads_; i++) {
threads[i] = new ThreadState(i, &shared);
env->StartThread(ThreadBody, threads[i]);
}
{
MutexLock l(shared.GetMutex());
while (!shared.AllInitialized()) {
shared.GetCondVar()->Wait();
}
// Record start time
uint64_t start_time = env->NowMicros();
// Start all threads
shared.SetStart();
shared.GetCondVar()->SignalAll();
// Wait threads to complete
while (!shared.AllDone()) {
shared.GetCondVar()->Wait();
}
// Record end time
uint64_t end_time = env->NowMicros();
fprintf(stdout, "Complete in %" PRIu64 "ms\n", end_time - start_time);
}
return true;
}
private:
std::shared_ptr<Cache> cache_;
uint32_t num_threads_;
static void ThreadBody(void* v) {
ThreadState* thread = reinterpret_cast<ThreadState*>(v);
SharedState* shared = thread->shared;
{
MutexLock l(shared->GetMutex());
shared->IncInitialized();
if (shared->AllInitialized()) {
shared->GetCondVar()->SignalAll();
}
while (!shared->Started()) {
shared->GetCondVar()->Wait();
}
}
thread->shared->GetCacheBench()->OperateCache(thread);
{
MutexLock l(shared->GetMutex());
shared->IncDone();
if (shared->AllDone()) {
shared->GetCondVar()->SignalAll();
}
}
}
void OperateCache(ThreadState* thread) {
for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
uint64_t rand_key = thread->rnd.Next() % FLAGS_max_key;
// Cast uint64* to be char*, data would be copied to cache
Slice key(reinterpret_cast<char*>(&rand_key), 8);
int32_t prob_op = thread->rnd.Uniform(100);
if (prob_op >= 0 && prob_op < FLAGS_insert_percent) {
// do insert
auto handle = cache_->Insert(key, new char[10], 1, &deleter);
cache_->Release(handle);
} else if (prob_op -= FLAGS_insert_percent &&
prob_op < FLAGS_lookup_percent) {
// do lookup
auto handle = cache_->Lookup(key);
if (handle) {
cache_->Release(handle);
}
} else if (prob_op -= FLAGS_lookup_percent &&
prob_op < FLAGS_erase_percent) {
// do erase
cache_->Erase(key);
}
}
}
void PrintEnv() const {
printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion);
printf("Number of threads : %d\n", FLAGS_threads);
printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread);
printf("Cache size : %" PRIu64 "\n", FLAGS_cache_size);
printf("Num shard bits : %d\n", FLAGS_num_shard_bits);
printf("Max key : %" PRIu64 "\n", FLAGS_max_key);
printf("Insert percentage : %d%%\n", FLAGS_insert_percent);
printf("Lookup percentage : %d%%\n", FLAGS_lookup_percent);
printf("Erase percentage : %d%%\n", FLAGS_erase_percent);
printf("----------------------------\n");
}
};
} // namespace rocksdb
int main(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_threads <= 0) {
fprintf(stderr, "threads number <= 0\n");
exit(1);
}
rocksdb::CacheBench bench;
if (bench.Run()) {
return 0;
} else {
return 1;
}
}
#endif // GFLAGS

@ -6,7 +6,10 @@
// Must not be included from any .h files to avoid polluting the namespace
// with macros.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <stdio.h>
#include <string>

@ -11,7 +11,10 @@ int main() {
}
#else
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>
#include <gflags/gflags.h>

@ -392,6 +392,9 @@ TEST(EnvPosixTest, DecreaseNumBgThreads) {
}
#ifdef OS_LINUX
// Travis doesn't support fallocate or getting unique ID from files for whatever
// reason.
#ifndef TRAVIS
// To make sure the Env::GetUniqueId() related tests work correctly, The files
// should be stored in regular storage like "hard disk" or "flash device".
// Otherwise we cannot get the correct id.
@ -507,7 +510,7 @@ TEST(EnvPosixTest, AllocateTest) {
// verify that preallocated blocks were deallocated on file close
ASSERT_GT(st_blocks, f_stat.st_blocks);
}
#endif
#endif // ROCKSDB_FALLOCATE_PRESENT
// Returns true if any of the strings in ss are the prefix of another string.
bool HasPrefix(const std::unordered_set<std::string>& ss) {
@ -638,7 +641,8 @@ TEST(EnvPosixTest, InvalidateCache) {
// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}
#endif
#endif // not TRAVIS
#endif // OS_LINUX
TEST(EnvPosixTest, PosixRandomRWFileTest) {
EnvOptions soptions;

@ -53,7 +53,7 @@ HistogramBucketMapper::HistogramBucketMapper()
}
}
const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
if (value >= maxBucketValue_) {
return bucketValues_.size() - 1;
} else if ( value >= minBucketValue_ ) {

@ -23,10 +23,10 @@ class HistogramBucketMapper {
HistogramBucketMapper();
// converts a value to the bucket index.
const size_t IndexForValue(const uint64_t value) const;
size_t IndexForValue(const uint64_t value) const;
// number of buckets required.
const size_t BucketCount() const {
size_t BucketCount() const {
return bucketValues_.size();
}
@ -65,6 +65,8 @@ class HistogramImpl {
virtual double StandardDeviation() const;
virtual void Data(HistogramData * const data) const;
virtual ~HistogramImpl() {}
private:
// To be able to use HistogramImpl as thread local variable, its constructor
// has to be static. That's why we're using manually values from BucketMapper

@ -14,6 +14,7 @@
#include "rocksdb/write_batch.h"
#include "rocksdb/cache.h"
#include "util/coding.h"
#include "util/scoped_arena_iterator.h"
#include "utilities/ttl/db_ttl_impl.h"
#include <ctime>
@ -739,7 +740,8 @@ void InternalDumpCommand::DoCommand() {
uint64_t c=0;
uint64_t s1=0,s2=0;
// Setup internal key iterator
auto iter = unique_ptr<Iterator>(idb->TEST_NewInternalIterator());
Arena arena;
ScopedArenaIterator iter(idb->TEST_NewInternalIterator(&arena));
Status st = iter->status();
if (!st.ok()) {
exec_state_ = LDBCommandExecuteResult::FAILED("Iterator error:"

@ -9,7 +9,10 @@
#include "util/logging.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <errno.h>
#include <stdarg.h>

@ -8,8 +8,12 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/options.h"
#include "rocksdb/immutable_options.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <limits>
@ -28,6 +32,26 @@
namespace rocksdb {
ImmutableCFOptions::ImmutableCFOptions(const Options& options)
: prefix_extractor(options.prefix_extractor.get()),
comparator(options.comparator),
merge_operator(options.merge_operator.get()),
info_log(options.info_log.get()),
statistics(options.statistics.get()),
env(options.env),
allow_mmap_reads(options.allow_mmap_reads),
allow_mmap_writes(options.allow_mmap_writes),
db_paths(options.db_paths),
table_factory(options.table_factory.get()),
table_properties_collector_factories(
options.table_properties_collector_factories),
advise_random_on_open(options.advise_random_on_open),
bloom_locality(options.bloom_locality),
purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush),
min_partial_merge_operands(options.min_partial_merge_operands),
disable_data_sync(options.disableDataSync),
use_fsync(options.use_fsync) {}
ColumnFamilyOptions::ColumnFamilyOptions()
: comparator(BytewiseComparator()),
merge_operator(nullptr),

@ -7,7 +7,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <gflags/gflags.h>

@ -7,7 +7,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <limits>
#include "util/testharness.h"

@ -0,0 +1,28 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "rocksdb/iterator.h"
namespace rocksdb {
class ScopedArenaIterator {
public:
explicit ScopedArenaIterator(Iterator* iter = nullptr) : iter_(iter) {}
Iterator* operator->() { return iter_; }
void set(Iterator* iter) { iter_ = iter; }
Iterator* get() { return iter_; }
~ScopedArenaIterator() { iter_->~Iterator(); }
private:
Iterator* iter_;
};
} // namespace rocksdb

@ -5,7 +5,10 @@
//
#include "util/statistics.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include "rocksdb/statistics.h"
#include "port/likely.h"

@ -15,7 +15,9 @@
#include "util/crc32c.h"
#include "rocksdb/transaction_log.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <algorithm>

@ -916,7 +916,7 @@ TEST(BackupableDBTest, RateLimiting) {
auto backup_time = env_->NowMicros() - start_backup;
auto rate_limited_backup_time = (bytes_written * kMicrosPerSec) /
backupable_options_->backup_rate_limit;
ASSERT_GT(backup_time, 0.9 * rate_limited_backup_time);
ASSERT_GT(backup_time, 0.8 * rate_limited_backup_time);
CloseBackupableDB();
@ -927,7 +927,7 @@ TEST(BackupableDBTest, RateLimiting) {
CloseRestoreDB();
auto rate_limited_restore_time = (bytes_written * kMicrosPerSec) /
backupable_options_->restore_rate_limit;
ASSERT_GT(restore_time, 0.9 * rate_limited_restore_time);
ASSERT_GT(restore_time, 0.8 * rate_limited_restore_time);
AssertBackupConsistency(0, 0, 100000, 100010);
}

@ -736,6 +736,7 @@ class DocumentDBImpl : public DocumentDB {
CreateColumnFamily(ColumnFamilyOptions(rocksdb_options_),
InternalSecondaryIndexName(index.name), &cf_handle);
if (!s.ok()) {
delete index_obj;
return s;
}

@ -6,7 +6,10 @@
#include "rocksdb/utilities/json_document.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <cassert>
#include <string>

@ -7,7 +7,9 @@
#include "utilities/geodb/geodb_impl.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <vector>
#include <map>

@ -7,7 +7,10 @@
#include "rocksdb/utilities/spatial_db.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <string>
#include <vector>

Loading…
Cancel
Save