diff --git a/db/column_family.cc b/db/column_family.cc index 7a3dfca87..ac85692e4 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -15,9 +15,77 @@ #include "db/version_set.h" #include "db/compaction_picker.h" +#include "db/table_properties_collector.h" +#include "util/hash_skiplist_rep.h" namespace rocksdb { +namespace { +// Fix user-supplied options to be reasonable +template +static void ClipToRange(T* ptr, V minvalue, V maxvalue) { + if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; + if (static_cast(*ptr) < minvalue) *ptr = minvalue; +} +} // anonymous namespace + +ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, + const ColumnFamilyOptions& src) { + ColumnFamilyOptions result = src; + result.comparator = icmp; + result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr; + ClipToRange(&result.write_buffer_size, + ((size_t)64) << 10, ((size_t)64) << 30); + // if user sets arena_block_size, we trust user to use this value. Otherwise, + // calculate a proper value from writer_buffer_size; + if (result.arena_block_size <= 0) { + result.arena_block_size = result.write_buffer_size / 10; + } + result.min_write_buffer_number_to_merge = + std::min(result.min_write_buffer_number_to_merge, + result.max_write_buffer_number - 1); + if (result.block_cache == nullptr && !result.no_block_cache) { + result.block_cache = NewLRUCache(8 << 20); + } + result.compression_per_level = src.compression_per_level; + if (result.block_size_deviation < 0 || result.block_size_deviation > 100) { + result.block_size_deviation = 0; + } + if (result.max_mem_compaction_level >= result.num_levels) { + result.max_mem_compaction_level = result.num_levels - 1; + } + if (result.soft_rate_limit > result.hard_rate_limit) { + result.soft_rate_limit = result.hard_rate_limit; + } + if (result.prefix_extractor) { + // If a prefix extractor has been supplied and a HashSkipListRepFactory is + // being used, make sure that the latter uses the former as its transform + // function. + auto factory = + dynamic_cast(result.memtable_factory.get()); + if (factory && factory->GetTransform() != result.prefix_extractor) { + result.memtable_factory = std::make_shared(); + } + } + + // -- Sanitize the table properties collector + // All user defined properties collectors will be wrapped by + // UserKeyTablePropertiesCollector since for them they only have the + // knowledge of the user keys; internal keys are invisible to them. + auto& collectors = result.table_properties_collectors; + for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) { + assert(collectors[i]); + collectors[i] = + std::make_shared(collectors[i]); + } + // Add collector to collect internal key statistics + collectors.push_back(std::make_shared()); + + return result; +} + + SuperVersion::SuperVersion() {} SuperVersion::~SuperVersion() { @@ -61,13 +129,16 @@ void SuperVersion::Init(MemTable* new_mem, MemTableListVersion* new_imm, ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name, Version* dummy_versions, - const ColumnFamilyOptions& options) + const ColumnFamilyOptions& options, + Logger* logger) : id_(id), name_(name), dummy_versions_(dummy_versions), current_(nullptr), - options_(options), - icmp_(options_.comparator), + internal_comparator_(options.comparator), + internal_filter_policy_(options.filter_policy), + options_(SanitizeOptions(&internal_comparator_, &internal_filter_policy_, + options)), mem_(nullptr), imm_(options.min_write_buffer_number_to_merge), super_version_(nullptr), @@ -77,9 +148,11 @@ ColumnFamilyData::ColumnFamilyData(uint32_t id, const std::string& name, log_number_(0), need_slowdown_for_num_level0_files_(false) { if (options_.compaction_style == kCompactionStyleUniversal) { - compaction_picker_.reset(new UniversalCompactionPicker(&options_, &icmp_)); + compaction_picker_.reset(new UniversalCompactionPicker( + &options_, &internal_comparator_, logger)); } else { - compaction_picker_.reset(new LevelCompactionPicker(&options_, &icmp_)); + compaction_picker_.reset( + new LevelCompactionPicker(&options_, &internal_comparator_, logger)); } } @@ -119,7 +192,7 @@ void ColumnFamilyData::CreateNewMemtable() { if (mem_ != nullptr) { delete mem_->Unref(); } - mem_ = new MemTable(icmp_, options_); + mem_ = new MemTable(internal_comparator_, options_); mem_->Ref(); } @@ -148,9 +221,11 @@ SuperVersion* ColumnFamilyData::InstallSuperVersion( return nullptr; } -ColumnFamilySet::ColumnFamilySet() +ColumnFamilySet::ColumnFamilySet(Logger* logger) : max_column_family_(0), - dummy_cfd_(new ColumnFamilyData(0, "", nullptr, ColumnFamilyOptions())) { + dummy_cfd_(new ColumnFamilyData(0, "", nullptr, ColumnFamilyOptions(), + nullptr)), + logger_(logger) { // initialize linked list dummy_cfd_->prev_.store(dummy_cfd_); dummy_cfd_->next_.store(dummy_cfd_); @@ -206,7 +281,7 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily( assert(column_families_.find(name) == column_families_.end()); column_families_.insert({name, id}); ColumnFamilyData* new_cfd = - new ColumnFamilyData(id, name, dummy_versions, options); + new ColumnFamilyData(id, name, dummy_versions, options, logger_); column_family_data_.insert({id, new_cfd}); max_column_family_ = std::max(max_column_family_, id); // add to linked list diff --git a/db/column_family.h b/db/column_family.h index 7c1920308..390cef839 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -14,6 +14,7 @@ #include #include "rocksdb/options.h" +#include "rocksdb/env.h" #include "db/memtablelist.h" #include "db/write_batch_internal.h" @@ -55,11 +56,16 @@ struct SuperVersion { Version* new_current); }; +extern ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, + const ColumnFamilyOptions& src); + // column family metadata. not thread-safe. should be protected by db_mutex class ColumnFamilyData { public: ColumnFamilyData(uint32_t id, const std::string& name, - Version* dummy_versions, const ColumnFamilyOptions& options); + Version* dummy_versions, const ColumnFamilyOptions& options, + Logger* logger); ~ColumnFamilyData(); uint32_t GetID() const { return id_; } @@ -89,8 +95,12 @@ class ColumnFamilyData { CompactionPicker* compaction_picker() const { return compaction_picker_.get(); } - const Comparator* user_comparator() const { return icmp_.user_comparator(); } - const InternalKeyComparator& internal_comparator() const { return icmp_; } + const Comparator* user_comparator() const { + return internal_comparator_.user_comparator(); + } + const InternalKeyComparator& internal_comparator() const { + return internal_comparator_; + } SuperVersion* GetSuperVersion() const { return super_version_; } uint64_t GetSuperVersionNumber() const { @@ -117,9 +127,11 @@ class ColumnFamilyData { const std::string name_; Version* dummy_versions_; // Head of circular doubly-linked list of versions. Version* current_; // == dummy_versions->prev_ - ColumnFamilyOptions options_; - const InternalKeyComparator icmp_; + const InternalKeyComparator internal_comparator_; + const InternalFilterPolicy internal_filter_policy_; + + ColumnFamilyOptions options_; MemTable* mem_; MemTableList imm_; @@ -170,7 +182,7 @@ class ColumnFamilySet { ColumnFamilyData* current_; }; - ColumnFamilySet(); + explicit ColumnFamilySet(Logger* logger); ~ColumnFamilySet(); ColumnFamilyData* GetDefault() const; @@ -203,6 +215,7 @@ class ColumnFamilySet { std::vector droppped_column_families_; uint32_t max_column_family_; ColumnFamilyData* dummy_cfd_; + Logger* logger_; }; class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index 958d2797a..330a8b1c4 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -10,7 +10,6 @@ #include "db/compaction_picker.h" #include -#include "util/statistics.h" namespace rocksdb { @@ -42,8 +41,10 @@ uint64_t MultiplyCheckOverflow(uint64_t op1, int op2) { } // anonymous namespace CompactionPicker::CompactionPicker(const ColumnFamilyOptions* options, - const InternalKeyComparator* icmp) + const InternalKeyComparator* icmp, + Logger* logger) : compactions_in_progress_(options->num_levels), + logger_(logger), options_(options), num_levels_(options->num_levels), icmp_(icmp) { @@ -269,17 +270,13 @@ void CompactionPicker::SetupOtherInputs(Compaction* c) { &c->parent_index_); if (expanded1.size() == c->inputs_[1].size() && !FilesInCompaction(expanded1)) { - Log(options_->info_log, + Log(logger_, "Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu bytes)" "\n", - (unsigned long)level, - (unsigned long)(c->inputs_[0].size()), - (unsigned long)(c->inputs_[1].size()), - (unsigned long)inputs0_size, - (unsigned long)inputs1_size, - (unsigned long)(expanded0.size()), - (unsigned long)(expanded1.size()), - (unsigned long)expanded0_size, + (unsigned long)level, (unsigned long)(c->inputs_[0].size()), + (unsigned long)(c->inputs_[1].size()), (unsigned long)inputs0_size, + (unsigned long)inputs1_size, (unsigned long)(expanded0.size()), + (unsigned long)(expanded1.size()), (unsigned long)expanded0_size, (unsigned long)inputs1_size); smallest = new_start; largest = new_limit; @@ -344,7 +341,7 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level, c->inputs_[0] = inputs; if (ExpandWhileOverlapping(c) == false) { delete c; - Log(options_->info_log, "Could not compact due to expansion failure.\n"); + Log(logger_, "Could not compact due to expansion failure.\n"); return nullptr; } @@ -511,7 +508,7 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version, } //if (i > Version::number_of_files_to_sort_) { - // Log(options_->info_log, "XXX Looking at index %d", i); + // Log(logger_, "XXX Looking at index %d", i); //} // Do not pick this file if its parents at level+1 are being compacted. @@ -547,13 +544,12 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version) { if ((version->files_[level].size() < (unsigned int)options_->level0_file_num_compaction_trigger)) { - Log(options_->info_log, "Universal: nothing to do\n"); + Log(logger_, "Universal: nothing to do\n"); return nullptr; } Version::FileSummaryStorage tmp; - Log(options_->info_log, "Universal: candidate files(%zu): %s\n", - version->files_[level].size(), - version->LevelFileSummary(&tmp, 0)); + Log(logger_, "Universal: candidate files(%zu): %s\n", + version->files_[level].size(), version->LevelFileSummary(&tmp, 0)); // Check for size amplification first. Compaction* c = PickCompactionUniversalSizeAmp(version, score); @@ -599,10 +595,6 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version) { c->bottommost_level_ = true; } - // update statistics - MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION, - c->inputs_[0].size()); - // mark all the files that are being compacted c->MarkFilesBeingCompacted(true); @@ -658,8 +650,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( candidate_count = 1; break; } - Log(options_->info_log, - "Universal: file %lu[%d] being compacted, skipping", + Log(logger_, "Universal: file %lu[%d] being compacted, skipping", (unsigned long)f->number, loop); f = nullptr; } @@ -668,7 +659,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( // first candidate to be compacted. uint64_t candidate_size = f != nullptr? f->file_size : 0; if (f != nullptr) { - Log(options_->info_log, "Universal: Possible candidate file %lu[%d].", + Log(logger_, "Universal: Possible candidate file %lu[%d].", (unsigned long)f->number, loop); } @@ -701,11 +692,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( i < loop + candidate_count && i < file_by_time.size(); i++) { int index = file_by_time[i]; FileMetaData* f = version->files_[level][index]; - Log(options_->info_log, - "Universal: Skipping file %lu[%d] with size %lu %d\n", - (unsigned long)f->number, - i, - (unsigned long)f->file_size, + Log(logger_, "Universal: Skipping file %lu[%d] with size %lu %d\n", + (unsigned long)f->number, i, (unsigned long)f->file_size, f->being_compacted); } } @@ -740,10 +728,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp( int index = file_by_time[i]; FileMetaData* f = c->input_version_->files_[level][index]; c->inputs_[0].push_back(f); - Log(options_->info_log, "Universal: Picking file %lu[%d] with size %lu\n", - (unsigned long)f->number, - i, - (unsigned long)f->file_size); + Log(logger_, "Universal: Picking file %lu[%d] with size %lu\n", + (unsigned long)f->number, i, (unsigned long)f->file_size); } return c; } @@ -779,9 +765,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( start_index = loop; // Consider this as the first candidate. break; } - Log(options_->info_log, "Universal: skipping file %lu[%d] compacted %s", - (unsigned long)f->number, - loop, + Log(logger_, "Universal: skipping file %lu[%d] compacted %s", + (unsigned long)f->number, loop, " cannot be a candidate to reduce size amp.\n"); f = nullptr; } @@ -789,10 +774,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( return nullptr; // no candidate files } - Log(options_->info_log, "Universal: First candidate file %lu[%d] %s", - (unsigned long)f->number, - start_index, - " to reduce size amp.\n"); + Log(logger_, "Universal: First candidate file %lu[%d] %s", + (unsigned long)f->number, start_index, " to reduce size amp.\n"); // keep adding up all the remaining files for (unsigned int loop = start_index; loop < file_by_time.size() - 1; @@ -800,10 +783,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( int index = file_by_time[loop]; f = version->files_[level][index]; if (f->being_compacted) { - Log(options_->info_log, - "Universal: Possible candidate file %lu[%d] %s.", - (unsigned long)f->number, - loop, + Log(logger_, "Universal: Possible candidate file %lu[%d] %s.", + (unsigned long)f->number, loop, " is already being compacted. No size amp reduction possible.\n"); return nullptr; } @@ -820,18 +801,16 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( // size amplification = percentage of additional size if (candidate_size * 100 < ratio * earliest_file_size) { - Log(options_->info_log, + Log(logger_, "Universal: size amp not needed. newer-files-total-size %lu " "earliest-file-size %lu", - (unsigned long)candidate_size, - (unsigned long)earliest_file_size); + (unsigned long)candidate_size, (unsigned long)earliest_file_size); return nullptr; } else { - Log(options_->info_log, + Log(logger_, "Universal: size amp needed. newer-files-total-size %lu " "earliest-file-size %lu", - (unsigned long)candidate_size, - (unsigned long)earliest_file_size); + (unsigned long)candidate_size, (unsigned long)earliest_file_size); } assert(start_index >= 0 && start_index < file_by_time.size() - 1); @@ -845,11 +824,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp( int index = file_by_time[loop]; f = c->input_version_->files_[level][index]; c->inputs_[0].push_back(f); - Log(options_->info_log, - "Universal: size amp picking file %lu[%d] with size %lu", - (unsigned long)f->number, - index, - (unsigned long)f->file_size); + Log(logger_, "Universal: size amp picking file %lu[%d] with size %lu", + (unsigned long)f->number, index, (unsigned long)f->file_size); } return c; } diff --git a/db/compaction_picker.h b/db/compaction_picker.h index 1b6897546..c11b64785 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -12,6 +12,7 @@ #include "db/compaction.h" #include "rocksdb/status.h" #include "rocksdb/options.h" +#include "rocksdb/env.h" #include #include @@ -25,7 +26,7 @@ class Version; class CompactionPicker { public: CompactionPicker(const ColumnFamilyOptions* options, - const InternalKeyComparator* icmp); + const InternalKeyComparator* icmp, Logger* logger); virtual ~CompactionPicker(); // Pick level and inputs for a new compaction. @@ -116,7 +117,9 @@ class CompactionPicker { // Per-level max bytes std::unique_ptr level_max_bytes_; + Logger* logger_; const ColumnFamilyOptions* const options_; + private: int num_levels_; @@ -126,8 +129,8 @@ class CompactionPicker { class UniversalCompactionPicker : public CompactionPicker { public: UniversalCompactionPicker(const ColumnFamilyOptions* options, - const InternalKeyComparator* icmp) - : CompactionPicker(options, icmp) {} + const InternalKeyComparator* icmp, Logger* logger) + : CompactionPicker(options, icmp, logger) {} virtual Compaction* PickCompaction(Version* version) override; private: @@ -143,8 +146,8 @@ class UniversalCompactionPicker : public CompactionPicker { class LevelCompactionPicker : public CompactionPicker { public: LevelCompactionPicker(const ColumnFamilyOptions* options, - const InternalKeyComparator* icmp) - : CompactionPicker(options, icmp) {} + const InternalKeyComparator* icmp, Logger* logger) + : CompactionPicker(options, icmp, logger) {} virtual Compaction* PickCompaction(Version* version) override; private: diff --git a/db/db_impl.cc b/db/db_impl.cc index f32ab72d7..073f1c4b8 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -120,32 +120,28 @@ struct DBImpl::CompactionState { } }; +namespace { // Fix user-supplied options to be reasonable template static void ClipToRange(T* ptr, V minvalue, V maxvalue) { if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; if (static_cast(*ptr) < minvalue) *ptr = minvalue; } +} // anonymous namespace + Options SanitizeOptions(const std::string& dbname, const InternalKeyComparator* icmp, const InternalFilterPolicy* ipolicy, const Options& src) { - Options result = src; - result.comparator = icmp; - result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr; - ClipToRange(&result.max_open_files, 20, 1000000); - ClipToRange(&result.write_buffer_size, ((size_t)64)<<10, - ((size_t)64)<<30); - ClipToRange(&result.block_size, 1<<10, 4<<20); - - // if user sets arena_block_size, we trust user to use this value. Otherwise, - // calculate a proper value from writer_buffer_size; - if (result.arena_block_size <= 0) { - result.arena_block_size = result.write_buffer_size / 10; - } - - result.min_write_buffer_number_to_merge = std::min( - result.min_write_buffer_number_to_merge, result.max_write_buffer_number-1); + auto db_options = SanitizeOptions(dbname, DBOptions(src)); + auto cf_options = SanitizeOptions(icmp, ipolicy, ColumnFamilyOptions(src)); + return Options(db_options, cf_options); +} + +DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { + DBOptions result = src; + ClipToRange(&result.max_open_files, 20, 1000000); + if (result.info_log == nullptr) { Status s = CreateLoggerFromOptions(dbname, result.db_log_dir, src.env, result, &result.info_log); @@ -154,60 +150,12 @@ Options SanitizeOptions(const std::string& dbname, result.info_log = nullptr; } } - if (result.block_cache == nullptr && !result.no_block_cache) { - result.block_cache = NewLRUCache(8 << 20); - } - result.compression_per_level = src.compression_per_level; - if (result.block_size_deviation < 0 || result.block_size_deviation > 100) { - result.block_size_deviation = 0; - } - if (result.max_mem_compaction_level >= result.num_levels) { - result.max_mem_compaction_level = result.num_levels - 1; - } - if (result.soft_rate_limit > result.hard_rate_limit) { - result.soft_rate_limit = result.hard_rate_limit; - } - if (result.compaction_filter) { - Log(result.info_log, "Compaction filter specified, ignore factory"); - } - if (result.prefix_extractor) { - // If a prefix extractor has been supplied and a HashSkipListRepFactory is - // being used, make sure that the latter uses the former as its transform - // function. - auto factory = dynamic_cast( - result.memtable_factory.get()); - if (factory && - factory->GetTransform() != result.prefix_extractor) { - Log(result.info_log, "A prefix hash representation factory was supplied " - "whose prefix extractor does not match options.prefix_extractor. " - "Falling back to skip list representation factory"); - result.memtable_factory = std::make_shared(); - } else if (factory) { - Log(result.info_log, "Prefix hash memtable rep is in use."); - } - } if (result.wal_dir.empty()) { // Use dbname as default result.wal_dir = dbname; } - // -- Sanitize the table properties collector - // All user defined properties collectors will be wrapped by - // UserKeyTablePropertiesCollector since for them they only have the - // knowledge of the user keys; internal keys are invisible to them. - auto& collectors = result.table_properties_collectors; - for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) { - assert(collectors[i]); - collectors[i] = - std::make_shared(collectors[i]); - } - - // Add collector to collect internal key statistics - collectors.push_back( - std::make_shared() - ); - return result; } @@ -1979,6 +1927,9 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, for (auto cfd : *versions_->GetColumnFamilySet()) { c.reset(cfd->PickCompaction()); if (c != nullptr) { + // update statistics + MeasureTime(options_.statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION, + c->inputs(0)->size()); break; } } diff --git a/db/db_impl.h b/db/db_impl.h index e886256ce..0e001e837 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -510,7 +510,7 @@ extern Options SanitizeOptions(const std::string& db, const InternalKeyComparator* icmp, const InternalFilterPolicy* ipolicy, const Options& src); - +extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src); // Determine compression type, based on user options, level of the output // file and whether compression is disabled. diff --git a/db/version_set.cc b/db/version_set.cc index 56e285015..7fbe44047 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1374,7 +1374,7 @@ class VersionSet::Builder { VersionSet::VersionSet(const std::string& dbname, const Options* options, const EnvOptions& storage_options, TableCache* table_cache) - : column_family_set_(new ColumnFamilySet()), + : column_family_set_(new ColumnFamilySet(options->info_log.get())), env_(options->env), dbname_(dbname), options_(options), diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0ca7055bb..e7994d798 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -118,12 +118,6 @@ struct ColumnFamilyOptions { // Default: a factory that doesn't provide any object std::shared_ptr compaction_filter_factory; - // Any internal progress/error information generated by the db will - // be written to info_log if it is non-nullptr, or to a file stored - // in the same directory as the DB contents if info_log is nullptr. - // Default: nullptr - shared_ptr info_log; - // ------------------- // Parameters that affect performance @@ -322,11 +316,6 @@ struct ColumnFamilyOptions { // stop building a single file in a level->level+1 compaction. int max_grandparent_overlap_factor; - // If non-null, then we should collect metrics about database operations - // Statistics objects should not be shared between DB instances as - // it does not use any locks to prevent concurrent updates. - shared_ptr statistics; - // Disable compaction triggered by seek. // With bloomfilter and fast storage, a miss on one level // is very cheap if the file handle is cached in table cache @@ -356,18 +345,6 @@ struct ColumnFamilyOptions { // Default: false bool no_block_cache; - // Number of shards used for table cache. - int table_cache_numshardbits; - - // During data eviction of table's LRU cache, it would be inefficient - // to strictly follow LRU because this piece of memory will not really - // be released unless its refcount falls to zero. Instead, make two - // passes: the first pass will release items with refcount = 1, - // and if not enough space releases after scanning the number of - // elements specified by this parameter, we will remove items in LRU - // order. - int table_cache_remove_scan_count_limit; - // size of one block in arena memory allocation. // If <= 0, a proper value is automatically calculated (usually 1/10 of // writer_buffer_size). @@ -489,6 +466,12 @@ struct DBOptions { // Default: Env::Default() Env* env; + // Any internal progress/error information generated by the db will + // be written to info_log if it is non-nullptr, or to a file stored + // in the same directory as the DB contents if info_log is nullptr. + // Default: nullptr + shared_ptr info_log; + // Number of open files that can be used by the DB. You may need to // increase this if your database has a large working set (budget // one open file per 2MB of working set). @@ -496,6 +479,11 @@ struct DBOptions { // Default: 1000 int max_open_files; + // If non-null, then we should collect metrics about database operations + // Statistics objects should not be shared between DB instances as + // it does not use any locks to prevent concurrent updates. + shared_ptr statistics; + // If true, then the contents of data files are not synced // to stable storage. Their contents remain in the OS buffers till the // OS decides to flush them. This option is good for bulk-loading @@ -577,6 +565,18 @@ struct DBOptions { // The default value is MAX_INT so that roll-over does not take place. uint64_t max_manifest_file_size; + // Number of shards used for table cache. + int table_cache_numshardbits; + + // During data eviction of table's LRU cache, it would be inefficient + // to strictly follow LRU because this piece of memory will not really + // be released unless its refcount falls to zero. Instead, make two + // passes: the first pass will release items with refcount = 1, + // and if not enough space releases after scanning the number of + // elements specified by this parameter, we will remove items in LRU + // order. + int table_cache_remove_scan_count_limit; + // The following two fields affect how archived logs will be deleted. // 1. If both set to 0, logs will be deleted asap and will not get into // the archive. diff --git a/util/auto_roll_logger.cc b/util/auto_roll_logger.cc index 95f2fae0d..c3d341590 100644 --- a/util/auto_roll_logger.cc +++ b/util/auto_roll_logger.cc @@ -77,7 +77,7 @@ Status CreateLoggerFromOptions( const std::string& dbname, const std::string& db_log_dir, Env* env, - const Options& options, + const DBOptions& options, std::shared_ptr* logger) { std::string db_absolute_path; env->GetAbsolutePath(dbname, &db_absolute_path); diff --git a/util/auto_roll_logger.h b/util/auto_roll_logger.h index db70f1586..68705a2db 100644 --- a/util/auto_roll_logger.h +++ b/util/auto_roll_logger.h @@ -84,7 +84,7 @@ Status CreateLoggerFromOptions( const std::string& dbname, const std::string& db_log_dir, Env* env, - const Options& options, + const DBOptions& options, std::shared_ptr* logger); } // namespace rocksdb diff --git a/util/auto_roll_logger_test.cc b/util/auto_roll_logger_test.cc index 2fd2c51f3..d8dbd9182 100755 --- a/util/auto_roll_logger_test.cc +++ b/util/auto_roll_logger_test.cc @@ -191,7 +191,7 @@ TEST(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) { } TEST(AutoRollLoggerTest, CreateLoggerFromOptions) { - Options options; + DBOptions options; shared_ptr logger; // Normal logger diff --git a/util/options.cc b/util/options.cc index 1d0a4aea8..50d1e850e 100644 --- a/util/options.cc +++ b/util/options.cc @@ -32,7 +32,6 @@ ColumnFamilyOptions::ColumnFamilyOptions() compaction_filter_factory( std::shared_ptr( new DefaultCompactionFilterFactory())), - info_log(nullptr), write_buffer_size(4<<20), max_write_buffer_number(2), min_write_buffer_number_to_merge(1), @@ -57,14 +56,11 @@ ColumnFamilyOptions::ColumnFamilyOptions() expanded_compaction_factor(25), source_compaction_factor(1), max_grandparent_overlap_factor(10), - statistics(nullptr), disable_seek_compaction(false), soft_rate_limit(0.0), hard_rate_limit(0.0), rate_limit_delay_max_milliseconds(1000), no_block_cache(false), - table_cache_numshardbits(4), - table_cache_remove_scan_count_limit(16), arena_block_size(0), disable_auto_compactions(false), purge_redundant_kvs_while_flush(true), @@ -86,7 +82,6 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) merge_operator(options.merge_operator), compaction_filter(options.compaction_filter), compaction_filter_factory(options.compaction_filter_factory), - info_log(options.info_log), write_buffer_size(options.write_buffer_size), max_write_buffer_number(options.max_write_buffer_number), min_write_buffer_number_to_merge( @@ -116,16 +111,12 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) expanded_compaction_factor(options.expanded_compaction_factor), source_compaction_factor(options.source_compaction_factor), max_grandparent_overlap_factor(options.max_grandparent_overlap_factor), - statistics(options.statistics), disable_seek_compaction(options.disable_seek_compaction), soft_rate_limit(options.soft_rate_limit), hard_rate_limit(options.hard_rate_limit), rate_limit_delay_max_milliseconds( options.rate_limit_delay_max_milliseconds), no_block_cache(options.no_block_cache), - table_cache_numshardbits(options.table_cache_numshardbits), - table_cache_remove_scan_count_limit( - options.table_cache_remove_scan_count_limit), arena_block_size(options.arena_block_size), disable_auto_compactions(options.disable_auto_compactions), purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush), @@ -149,7 +140,9 @@ DBOptions::DBOptions() error_if_exists(false), paranoid_checks(false), env(Env::Default()), + info_log(nullptr), max_open_files(1000), + statistics(nullptr), disableDataSync(false), use_fsync(false), db_stats_log_interval(1800), @@ -162,6 +155,8 @@ DBOptions::DBOptions() log_file_time_to_roll(0), keep_log_file_num(1000), max_manifest_file_size(std::numeric_limits::max()), + table_cache_numshardbits(4), + table_cache_remove_scan_count_limit(16), WAL_ttl_seconds(0), WAL_size_limit_MB(0), manifest_preallocation_size(4 * 1024 * 1024), @@ -181,7 +176,9 @@ DBOptions::DBOptions(const Options& options) error_if_exists(options.error_if_exists), paranoid_checks(options.paranoid_checks), env(options.env), + info_log(options.info_log), max_open_files(options.max_open_files), + statistics(options.statistics), disableDataSync(options.disableDataSync), use_fsync(options.use_fsync), db_stats_log_interval(options.db_stats_log_interval), @@ -195,6 +192,9 @@ DBOptions::DBOptions(const Options& options) log_file_time_to_roll(options.log_file_time_to_roll), keep_log_file_num(options.keep_log_file_num), max_manifest_file_size(options.max_manifest_file_size), + table_cache_numshardbits(options.table_cache_numshardbits), + table_cache_remove_scan_count_limit( + options.table_cache_remove_scan_count_limit), WAL_ttl_seconds(options.WAL_ttl_seconds), WAL_size_limit_MB(options.WAL_size_limit_MB), manifest_preallocation_size(options.manifest_preallocation_size), diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index b6bdfdbe3..eb4a7d79b 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -344,7 +344,7 @@ class BackupableDBTest { options_.wal_dir = dbname_; // set up backup db options CreateLoggerFromOptions(dbname_, backupdir_, env_, - Options(), &logger_); + DBOptions(), &logger_); backupable_options_.reset(new BackupableDBOptions( backupdir_, test_backup_env_.get(), true, logger_.get(), true));