diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3a17a883f..7a94c7f69 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,6 +10,10 @@ the CLA and we can cross-check with your GitHub username. Complete your CLA here: +If you don't have a Facebook account, we can send you a PDF that you can +sign offline. Send us an e-mail or create a new github issue to +request the CLA in PDF format. + ## License By contributing to RocksDB, you agree that your contributions will be diff --git a/INSTALL.md b/INSTALL.md index a63b9b15c..472fd2331 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -71,7 +71,7 @@ libraries. You are on your own. `make clean; make` will compile librocksdb.a (RocskDB static library) and all the unit tests. You can run all unit tests with `make check`. -For shared library builds, exec `make librocksdb.so` instead. +For shared library builds, exec `make shared_lib` instead. If you followed the above steps and your compile or unit tests fail, please submit an issue: (https://github.com/facebook/rocksdb/issues) diff --git a/Makefile b/Makefile index 04d8466a5..e9985ac04 100644 --- a/Makefile +++ b/Makefile @@ -78,6 +78,7 @@ TESTS = \ redis_test \ reduce_levels_test \ plain_table_db_test \ + prefix_test \ simple_table_db_test \ skiplist_test \ stringappend_test \ @@ -288,6 +289,9 @@ crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg + plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) diff --git a/db/c.cc b/db/c.cc index 68f361336..935a297f1 100644 --- a/db/c.cc +++ b/db/c.cc @@ -17,11 +17,14 @@ #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/iterator.h" +#include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksdb/memtablerep.h" #include "rocksdb/universal_compaction.h" +#include "rocksdb/statistics.h" +#include "rocksdb/slice_transform.h" using rocksdb::Cache; using rocksdb::Comparator; @@ -30,8 +33,10 @@ using rocksdb::DB; using rocksdb::Env; using rocksdb::FileLock; using rocksdb::FilterPolicy; +using rocksdb::FlushOptions; using rocksdb::Iterator; using rocksdb::Logger; +using rocksdb::MergeOperator; using rocksdb::NewBloomFilterPolicy; using rocksdb::NewLRUCache; using rocksdb::Options; @@ -40,6 +45,7 @@ using rocksdb::Range; using rocksdb::ReadOptions; using rocksdb::SequentialFile; using rocksdb::Slice; +using rocksdb::SliceTransform; using rocksdb::Snapshot; using rocksdb::Status; using rocksdb::WritableFile; @@ -50,19 +56,20 @@ using std::shared_ptr; extern "C" { -struct rocksdb_t { DB* rep; }; -struct rocksdb_iterator_t { Iterator* rep; }; -struct rocksdb_writebatch_t { WriteBatch rep; }; -struct rocksdb_snapshot_t { const Snapshot* rep; }; -struct rocksdb_readoptions_t { ReadOptions rep; }; -struct rocksdb_writeoptions_t { WriteOptions rep; }; -struct rocksdb_options_t { Options rep; }; -struct rocksdb_seqfile_t { SequentialFile* rep; }; -struct rocksdb_randomfile_t { RandomAccessFile* rep; }; -struct rocksdb_writablefile_t { WritableFile* rep; }; -struct rocksdb_filelock_t { FileLock* rep; }; -struct rocksdb_logger_t { shared_ptr rep; }; -struct rocksdb_cache_t { shared_ptr rep; }; +struct rocksdb_t { DB* rep; }; +struct rocksdb_iterator_t { Iterator* rep; }; +struct rocksdb_writebatch_t { WriteBatch rep; }; +struct rocksdb_snapshot_t { const Snapshot* rep; }; +struct rocksdb_flushoptions_t { FlushOptions rep; }; +struct rocksdb_readoptions_t { ReadOptions rep; }; +struct rocksdb_writeoptions_t { WriteOptions rep; }; +struct rocksdb_options_t { Options rep; }; +struct rocksdb_seqfile_t { SequentialFile* rep; }; +struct rocksdb_randomfile_t { RandomAccessFile* rep; }; +struct rocksdb_writablefile_t { WritableFile* rep; }; +struct rocksdb_filelock_t { FileLock* rep; }; +struct rocksdb_logger_t { shared_ptr rep; }; +struct rocksdb_cache_t { shared_ptr rep; }; struct rocksdb_comparator_t : public Comparator { void* state_; @@ -103,6 +110,9 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { void*, const char* key, size_t length, const char* filter, size_t filter_length); + void (*delete_filter_)( + void*, + const char* filter, size_t filter_length); virtual ~rocksdb_filterpolicy_t() { (*destructor_)(state_); @@ -122,7 +132,12 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { size_t len; char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); dst->append(filter, len); - free(filter); + + if (delete_filter_ != nullptr) { + (*delete_filter_)(state_, filter, len); + } else { + free(filter); + } } virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { @@ -131,16 +146,151 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { } }; +struct rocksdb_mergeoperator_t : public MergeOperator { + void* state_; + void (*destructor_)(void*); + const char* (*name_)(void*); + char* (*full_merge_)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length); + char* (*partial_merge_)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length); + void (*delete_value_)( + void*, + const char* value, size_t value_length); + + virtual ~rocksdb_mergeoperator_t() { + (*destructor_)(state_); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + virtual bool FullMerge( + const Slice& key, + const Slice* existing_value, + const std::deque& operand_list, + std::string* new_value, + Logger* logger) const { + + size_t n = operand_list.size(); + std::vector operand_pointers(n); + std::vector operand_sizes(n); + for (size_t i = 0; i < n; i++) { + Slice operand(operand_list[i]); + operand_pointers[i] = operand.data(); + operand_sizes[i] = operand.size(); + } + + const char* existing_value_data = nullptr; + size_t existing_value_len = 0; + if (existing_value != nullptr) { + existing_value_data = existing_value->data(); + existing_value_len = existing_value->size(); + } + + unsigned char success; + size_t new_value_len; + char* tmp_new_value = (*full_merge_)( + state_, + key.data(), key.size(), + existing_value_data, existing_value_len, + &operand_pointers[0], &operand_sizes[0], n, + &success, &new_value_len); + new_value->assign(tmp_new_value, new_value_len); + + if (delete_value_ != nullptr) { + (*delete_value_)(state_, tmp_new_value, new_value_len); + } else { + free(tmp_new_value); + } + + return success; + } + + virtual bool PartialMerge( + const Slice& key, + const Slice& left_operand, + const Slice& right_operand, + std::string* new_value, + Logger* logger) const { + + unsigned char success; + size_t new_value_len; + char* tmp_new_value = (*partial_merge_)( + state_, + key.data(), key.size(), + left_operand.data(), left_operand.size(), + right_operand.data(), right_operand.size(), + &success, &new_value_len); + new_value->assign(tmp_new_value, new_value_len); + + if (delete_value_ != nullptr) { + (*delete_value_)(state_, tmp_new_value, new_value_len); + } else { + free(tmp_new_value); + } + + return success; + } +}; + struct rocksdb_env_t { Env* rep; bool is_default; }; +struct rocksdb_slicetransform_t : public SliceTransform { + void* state_; + void (*destructor_)(void*); + const char* (*name_)(void*); + char* (*transform_)( + void*, + const char* key, size_t length, + size_t* dst_length); + unsigned char (*in_domain_)( + void*, + const char* key, size_t length); + unsigned char (*in_range_)( + void*, + const char* key, size_t length); + + virtual ~rocksdb_slicetransform_t() { + (*destructor_)(state_); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + virtual Slice Transform(const Slice& src) const { + size_t len; + char* dst = (*transform_)(state_, src.data(), src.size(), &len); + return Slice(dst, len); + } + + virtual bool InDomain(const Slice& src) const { + return (*in_domain_)(state_, src.data(), src.size()); + } + + virtual bool InRange(const Slice& src) const { + return (*in_range_)(state_, src.data(), src.size()); + } +}; + struct rocksdb_universal_compaction_options_t { rocksdb::CompactionOptionsUniversal *rep; }; - static bool SaveError(char** errptr, const Status& s) { assert(errptr != NULL); if (s.ok()) { @@ -197,6 +347,15 @@ void rocksdb_delete( SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); } +void rocksdb_merge( + rocksdb_t* db, + const rocksdb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr) { + SaveError(errptr, + db->rep->Merge(options->rep, Slice(key, keylen), Slice(val, vallen))); +} void rocksdb_write( rocksdb_t* db, @@ -287,6 +446,26 @@ void rocksdb_compact_range( (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL)); } +void rocksdb_flush( + rocksdb_t* db, + const rocksdb_flushoptions_t* options, + char** errptr) { + SaveError(errptr, db->rep->Flush(options->rep)); +} + +void rocksdb_disable_file_deletions( + rocksdb_t* db, + char** errptr) { + SaveError(errptr, db->rep->DisableFileDeletions()); +} + +void rocksdb_enable_file_deletions( + rocksdb_t* db, + unsigned char force, + char** errptr) { + SaveError(errptr, db->rep->EnableFileDeletions(force)); +} + void rocksdb_destroy_db( const rocksdb_options_t* options, const char* name, @@ -365,6 +544,13 @@ void rocksdb_writebatch_put( b->rep.Put(Slice(key, klen), Slice(val, vlen)); } +void rocksdb_writebatch_merge( + rocksdb_writebatch_t* b, + const char* key, size_t klen, + const char* val, size_t vlen) { + b->rep.Merge(Slice(key, klen), Slice(val, vlen)); +} + void rocksdb_writebatch_delete( rocksdb_writebatch_t* b, const char* key, size_t klen) { @@ -409,6 +595,12 @@ void rocksdb_options_set_comparator( opt->rep.comparator = cmp; } +void rocksdb_options_set_merge_operator( + rocksdb_options_t* opt, + rocksdb_mergeoperator_t* merge_operator) { + opt->rep.merge_operator = std::shared_ptr(merge_operator); +} + void rocksdb_options_set_filter_policy( rocksdb_options_t* opt, rocksdb_filterpolicy_t* policy) { @@ -454,6 +646,12 @@ void rocksdb_options_set_cache(rocksdb_options_t* opt, rocksdb_cache_t* c) { } } +void rocksdb_options_set_cache_compressed(rocksdb_options_t* opt, rocksdb_cache_t* c) { + if (c) { + opt->rep.block_cache_compressed = c->rep; + } +} + void rocksdb_options_set_block_size(rocksdb_options_t* opt, size_t s) { opt->rep.block_size = s; } @@ -492,6 +690,10 @@ void rocksdb_options_set_max_grandparent_overlap_factor( opt->rep.max_grandparent_overlap_factor = n; } +void rocksdb_options_enable_statistics(rocksdb_options_t* opt) { + opt->rep.statistics = rocksdb::CreateDBStatistics(); +} + void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) { opt->rep.num_levels = n; } @@ -537,6 +739,16 @@ void rocksdb_options_set_compression_options( opt->rep.compression_opts.strategy = strategy; } +void rocksdb_options_set_prefix_extractor( + rocksdb_options_t* opt, rocksdb_slicetransform_t* prefix_extractor) { + opt->rep.prefix_extractor = prefix_extractor; +} + +void rocksdb_options_set_whole_key_filtering( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.whole_key_filtering = v; +} + void rocksdb_options_set_disable_data_sync( rocksdb_options_t* opt, int disable_data_sync) { opt->rep.disableDataSync = disable_data_sync; @@ -557,6 +769,11 @@ void rocksdb_options_set_db_log_dir( opt->rep.db_log_dir = db_log_dir; } +void rocksdb_options_set_wal_dir( + rocksdb_options_t* opt, const char* v) { + opt->rep.wal_dir = v; +} + void rocksdb_options_set_WAL_ttl_seconds(rocksdb_options_t* opt, uint64_t ttl) { opt->rep.WAL_ttl_seconds = ttl; } @@ -566,6 +783,76 @@ void rocksdb_options_set_WAL_size_limit_MB( opt->rep.WAL_size_limit_MB = limit; } +void rocksdb_options_set_manifest_preallocation_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.manifest_preallocation_size = v; +} + +void rocksdb_options_set_purge_redundant_kvs_while_flush( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.purge_redundant_kvs_while_flush = v; +} + +void rocksdb_options_set_allow_os_buffer( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_os_buffer = v; +} + +void rocksdb_options_set_allow_mmap_reads( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_mmap_reads = v; +} + +void rocksdb_options_set_allow_mmap_writes( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_mmap_writes = v; +} + +void rocksdb_options_set_is_fd_close_on_exec( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.is_fd_close_on_exec = v; +} + +void rocksdb_options_set_skip_log_error_on_recovery( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.skip_log_error_on_recovery = v; +} + +void rocksdb_options_set_stats_dump_period_sec( + rocksdb_options_t* opt, unsigned int v) { + opt->rep.stats_dump_period_sec = v; +} + +void rocksdb_options_set_block_size_deviation( + rocksdb_options_t* opt, int v) { + opt->rep.block_size_deviation = v; +} + +void rocksdb_options_set_advise_random_on_open( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.advise_random_on_open = v; +} + +void rocksdb_options_set_use_adaptive_mutex( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.use_adaptive_mutex = v; +} + +void rocksdb_options_set_bytes_per_sync( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.bytes_per_sync = v; +} + +void rocksdb_options_set_filter_deletes( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.filter_deletes = v; +} + +void rocksdb_options_set_max_sequential_skip_in_iterations( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.max_sequential_skip_in_iterations = v; +} + void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number = n; } @@ -582,6 +869,56 @@ void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) { opt->rep.max_background_flushes = n; } +void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) { + opt->rep.max_log_file_size = v; +} + +void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t* opt, size_t v) { + opt->rep.log_file_time_to_roll = v; +} + +void rocksdb_options_set_keep_log_file_num(rocksdb_options_t* opt, size_t v) { + opt->rep.keep_log_file_num = v; +} + +void rocksdb_options_set_soft_rate_limit(rocksdb_options_t* opt, double v) { + opt->rep.soft_rate_limit = v; +} + +void rocksdb_options_set_hard_rate_limit(rocksdb_options_t* opt, double v) { + opt->rep.hard_rate_limit = v; +} + +void rocksdb_options_set_rate_limit_delay_max_milliseconds( + rocksdb_options_t* opt, unsigned int v) { + opt->rep.rate_limit_delay_max_milliseconds = v; +} + +void rocksdb_options_set_max_manifest_file_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.max_manifest_file_size = v; +} + +void rocksdb_options_set_no_block_cache( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.no_block_cache = v; +} + +void rocksdb_options_set_table_cache_numshardbits( + rocksdb_options_t* opt, int v) { + opt->rep.table_cache_numshardbits = v; +} + +void rocksdb_options_set_table_cache_remove_scan_count_limit( + rocksdb_options_t* opt, int v) { + opt->rep.table_cache_remove_scan_count_limit = v; +} + +void rocksdb_options_set_arena_block_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.arena_block_size = v; +} + void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt, int disable) { opt->rep.disable_auto_compactions = disable; } @@ -590,6 +927,11 @@ void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t* opt, int dis opt->rep.disable_seek_compaction = disable; } +void rocksdb_options_set_delete_obsolete_files_period_micros( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.delete_obsolete_files_period_micros = v; +} + void rocksdb_options_set_source_compaction_factor( rocksdb_options_t* opt, int n) { opt->rep.expanded_compaction_factor = n; @@ -607,6 +949,21 @@ void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t *opt) { opt->rep.memtable_factory.reset(factory); } +void rocksdb_options_set_memtable_prefix_bloom_bits( + rocksdb_options_t* opt, uint32_t v) { + opt->rep.memtable_prefix_bloom_bits = v; +} + +void rocksdb_options_set_memtable_prefix_bloom_probes( + rocksdb_options_t* opt, uint32_t v) { + opt->rep.memtable_prefix_bloom_probes = v; +} + +void rocksdb_options_set_max_successive_merges( + rocksdb_options_t* opt, size_t v) { + opt->rep.max_successive_merges = v; +} + void rocksdb_options_set_compaction_style(rocksdb_options_t *opt, int style) { opt->rep.compaction_style = static_cast(style); } @@ -617,38 +974,22 @@ void rocksdb_options_set_universal_compaction_options(rocksdb_options_t *opt, ro /* TODO: -merge_operator +DB::OpenForReadOnly +DB::MultiGet +DB::KeyMayExist +DB::GetOptions +DB::GetLiveFiles +DB::GetSortedWalFiles +DB::GetLatestSequenceNumber +DB::GetUpdatesSince +DB::DeleteFile +DB::GetLiveFilesMetaData +DB::GetDbIdentity +DB::RunManualCompaction +custom cache compaction_filter -prefix_extractor -whole_key_filtering max_bytes_for_level_multiplier_additional -delete_obsolete_files_period_micros -max_log_file_size -log_file_time_to_roll -keep_log_file_num -soft_rate_limit -hard_rate_limit -rate_limit_delay_max_milliseconds -max_manifest_file_size -no_block_cache -table_cache_numshardbits -table_cache_remove_scan_count_limit -arena_block_size -manifest_preallocation_size -purge_redundant_kvs_while_flush -allow_os_buffer -allow_mmap_reads -allow_mmap_writes -is_fd_close_on_exec -skip_log_error_on_recovery -stats_dump_period_sec -block_size_deviation -advise_random_on_open access_hint_on_compaction_start -use_adaptive_mutex -bytes_per_sync -filter_deletes -max_sequential_skip_in_iterations table_factory table_properties_collectors inplace_update_support @@ -687,12 +1028,16 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void*, const char* key, size_t length, const char* filter, size_t filter_length), + void (*delete_filter)( + void*, + const char* filter, size_t filter_length), const char* (*name)(void*)) { rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t; result->state_ = state; result->destructor_ = destructor; result->create_ = create_filter; result->key_match_ = key_may_match; + result->delete_filter_ = delete_filter; result->name_ = name; return result; } @@ -720,10 +1065,45 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { Wrapper* wrapper = new Wrapper; wrapper->rep_ = NewBloomFilterPolicy(bits_per_key); wrapper->state_ = NULL; + wrapper->delete_filter_ = NULL; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } +rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( + void* state, + void (*destructor)(void*), + char* (*full_merge)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length), + char* (*partial_merge)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length), + void (*delete_value)( + void*, + const char* value, size_t value_length), + const char* (*name)(void*)) { + rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t; + result->state_ = state; + result->destructor_ = destructor; + result->full_merge_ = full_merge; + result->partial_merge_ = partial_merge; + result->delete_value_ = delete_value; + result->name_ = name; + return result; +} + +void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t* merge_operator) { + delete merge_operator; +} + rocksdb_readoptions_t* rocksdb_readoptions_create() { return new rocksdb_readoptions_t; } @@ -743,12 +1123,33 @@ void rocksdb_readoptions_set_fill_cache( opt->rep.fill_cache = v; } +void rocksdb_readoptions_set_prefix_seek( + rocksdb_readoptions_t* opt, unsigned char v) { + opt->rep.prefix_seek = v; +} + void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t* opt, const rocksdb_snapshot_t* snap) { opt->rep.snapshot = (snap ? snap->rep : NULL); } +void rocksdb_readoptions_set_prefix( + rocksdb_readoptions_t* opt, const char* key, size_t keylen) { + Slice prefix = Slice(key, keylen); + opt->rep.prefix = &prefix; +} + +void rocksdb_readoptions_set_read_tier( + rocksdb_readoptions_t* opt, int v) { + opt->rep.read_tier = static_cast(v); +} + +void rocksdb_readoptions_set_tailing( + rocksdb_readoptions_t* opt, unsigned char v) { + opt->rep.tailing = v; +} + rocksdb_writeoptions_t* rocksdb_writeoptions_create() { return new rocksdb_writeoptions_t; } @@ -767,6 +1168,19 @@ void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable) } +rocksdb_flushoptions_t* rocksdb_flushoptions_create() { + return new rocksdb_flushoptions_t; +} + +void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t* opt) { + delete opt; +} + +void rocksdb_flushoptions_set_wait( + rocksdb_flushoptions_t* opt, unsigned char v) { + opt->rep.wait = v; +} + rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) { rocksdb_cache_t* c = new rocksdb_cache_t; c->rep = NewLRUCache(capacity); @@ -797,6 +1211,57 @@ void rocksdb_env_destroy(rocksdb_env_t* env) { delete env; } +rocksdb_slicetransform_t* rocksdb_slicetransform_create( + void* state, + void (*destructor)(void*), + char* (*transform)( + void*, + const char* key, size_t length, + size_t* dst_length), + unsigned char (*in_domain)( + void*, + const char* key, size_t length), + unsigned char (*in_range)( + void*, + const char* key, size_t length), + const char* (*name)(void*)) { + rocksdb_slicetransform_t* result = new rocksdb_slicetransform_t; + result->state_ = state; + result->destructor_ = destructor; + result->transform_ = transform; + result->in_domain_ = in_domain; + result->in_range_ = in_range; + result->name_ = name; + return result; +} + +void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t* st) { + delete st; +} + +rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t prefixLen) { + struct Wrapper : public rocksdb_slicetransform_t { + const SliceTransform* rep_; + ~Wrapper() { delete rep_; } + const char* Name() const { return rep_->Name(); } + Slice Transform(const Slice& src) const { + return rep_->Transform(src); + } + bool InDomain(const Slice& src) const { + return rep_->InDomain(src); + } + bool InRange(const Slice& src) const { + return rep_->InRange(src); + } + static void DoNothing(void*) { } + }; + Wrapper* wrapper = new Wrapper; + wrapper->rep_ = rocksdb::NewFixedPrefixTransform(prefixLen); + wrapper->state_ = NULL; + wrapper->destructor_ = &Wrapper::DoNothing; + return wrapper; +} + rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() { rocksdb_universal_compaction_options_t* result = new rocksdb_universal_compaction_options_t; result->rep = new rocksdb::CompactionOptionsUniversal; diff --git a/db/c_test.c b/db/c_test.c index 8c5e8e534..a68abca48 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -154,6 +154,37 @@ unsigned char FilterKeyMatch( return fake_filter_result; } +// Custom merge operator +static void MergeOperatorDestroy(void* arg) { } +static const char* MergeOperatorName(void* arg) { + return "TestMergeOperator"; +} +static char* MergeOperatorFullMerge( + void* arg, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length) { + *new_value_length = 4; + *success = 1; + char* result = malloc(4); + memcpy(result, "fake", 4); + return result; +} +static char* MergeOperatorPartialMerge( + void* arg, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length) { + *new_value_length = 4; + *success = 1; + char* result = malloc(4); + memcpy(result, "fake", 4); + return result; +} + int main(int argc, char** argv) { rocksdb_t* db; rocksdb_comparator_t* cmp; @@ -342,7 +373,7 @@ int main(int argc, char** argv) { rocksdb_filterpolicy_t* policy; if (run == 0) { policy = rocksdb_filterpolicy_create( - NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName); + NULL, FilterDestroy, FilterCreate, FilterKeyMatch, NULL, FilterName); } else { policy = rocksdb_filterpolicy_create_bloom(10); } @@ -376,6 +407,32 @@ int main(int argc, char** argv) { rocksdb_filterpolicy_destroy(policy); } + StartPhase("merge_operator"); + { + rocksdb_mergeoperator_t* merge_operator; + merge_operator = rocksdb_mergeoperator_create( + NULL, MergeOperatorDestroy, MergeOperatorFullMerge, + MergeOperatorPartialMerge, NULL, MergeOperatorName); + // Create new database + rocksdb_close(db); + rocksdb_destroy_db(options, dbname, &err); + rocksdb_options_set_merge_operator(options, merge_operator); + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "foo", "foovalue"); + rocksdb_merge(db, woptions, "foo", 3, "barvalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "foo", "fake"); + + // Merge of a non-existing value + rocksdb_merge(db, woptions, "bar", 3, "barvalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "bar", "fake"); + + } + StartPhase("cleanup"); rocksdb_close(db); rocksdb_options_destroy(options); diff --git a/db/db_impl.cc b/db/db_impl.cc index 91c60ddbb..2b409999e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3749,6 +3749,12 @@ Status DBImpl::DeleteFile(std::string name) { LogFlush(options_.info_log); // remove files outside the db-lock PurgeObsoleteFiles(deletion_state); + { + MutexLock l(&mutex_); + // schedule flush if file deletion means we freed the space for flushes to + // continue + MaybeScheduleFlushOrCompaction(); + } return status; } @@ -3864,7 +3870,7 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname, std::vector* handles, DB** dbptr) { *dbptr = nullptr; handles->clear(); - EnvOptions soptions; + EnvOptions soptions(db_options); size_t max_write_buffer_size = 0; for (auto cf : column_families) { diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 3ad7ce8d7..85d047809 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -220,7 +220,6 @@ class TestPlainTableFactory : public PlainTableFactory { size_t index_sparseness = 16) : PlainTableFactory(user_key_len, user_key_len, hash_table_ratio, hash_table_ratio), - user_key_len_(user_key_len), bloom_bits_per_key_(bloom_bits_per_key), hash_table_ratio_(hash_table_ratio), index_sparseness_(index_sparseness), @@ -245,7 +244,6 @@ class TestPlainTableFactory : public PlainTableFactory { } private: - uint32_t user_key_len_; int bloom_bits_per_key_; double hash_table_ratio_; size_t index_sparseness_; diff --git a/db/prefix_test.cc b/db/prefix_test.cc index ca00c31b3..c43ba5c4d 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -1,3 +1,8 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + #include #include #include @@ -6,6 +11,8 @@ #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/perf_context.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/memtablerep.h" #include "util/histogram.h" #include "util/stop_watch.h" #include "util/testharness.h" @@ -97,6 +104,36 @@ class TestKeyComparator : public Comparator { }; +void PutKey(DB* db, WriteOptions write_options, uint64_t prefix, + uint64_t suffix, const Slice& value) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + ASSERT_OK(db->Put(write_options, key, value)); +} + +void SeekIterator(Iterator* iter, uint64_t prefix, uint64_t suffix) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + iter->Seek(key); +} + +const std::string kNotFoundResult = "NOT_FOUND"; + +std::string Get(DB* db, const ReadOptions& read_options, uint64_t prefix, + uint64_t suffix) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + + std::string result; + Status s = db->Get(read_options, key, &result); + if (s.IsNotFound()) { + result = kNotFoundResult; + } else if (!s.ok()) { + result = s.ToString(); + } + return result; +} + class PrefixTest { public: std::shared_ptr OpenDb() { @@ -116,7 +153,11 @@ class PrefixTest { return std::shared_ptr(db); } - bool NextOptions() { + void FirstOption() { + option_config_ = kBegin; + } + + bool NextOptions(int bucket_count) { // skip some options option_config_++; if (option_config_ < kEnd) { @@ -124,15 +165,12 @@ class PrefixTest { options.prefix_extractor = prefix_extractor; switch(option_config_) { case kHashSkipList: - options.memtable_factory.reset( - NewHashSkipListRepFactory(options.prefix_extractor, - FLAGS_bucket_count, - FLAGS_skiplist_height)); + options.memtable_factory.reset(NewHashSkipListRepFactory( + options.prefix_extractor, bucket_count, FLAGS_skiplist_height)); return true; case kHashLinkList: - options.memtable_factory.reset( - NewHashLinkListRepFactory(options.prefix_extractor, - FLAGS_bucket_count)); + options.memtable_factory.reset(NewHashLinkListRepFactory( + options.prefix_extractor, bucket_count)); return true; default: return false; @@ -158,8 +196,182 @@ class PrefixTest { Options options; }; +TEST(PrefixTest, TestResult) { + for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { + FirstOption(); + while (NextOptions(num_buckets)) { + std::cout << "*** Mem table: " << options.memtable_factory->Name() + << " number of buckets: " << num_buckets + << std::endl; + DestroyDB(kDbName, Options()); + auto db = OpenDb(); + WriteOptions write_options; + ReadOptions read_options; + read_options.prefix_seek = true; + + // 1. Insert one row. + Slice v16("v16"); + PutKey(db.get(), write_options, 1, 6, v16); + std::unique_ptr iter(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 6); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(!iter->Valid()); + + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 7)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 0, 6)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 2, 6)); + + // 2. Insert an entry for the same prefix as the last entry in the bucket. + Slice v17("v17"); + PutKey(db.get(), write_options, 1, 7, v17); + iter.reset(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 6); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(!iter->Valid()); + + // 3. Insert an entry for the same prefix as the head of the bucket. + Slice v15("v15"); + PutKey(db.get(), write_options, 1, 5, v15); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); + + // 4. Insert an entry with a larger prefix + Slice v22("v22"); + PutKey(db.get(), write_options, 2, 2, v22); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 2, 2); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + // 5. Insert an entry with a smaller prefix + Slice v02("v02"); + PutKey(db.get(), write_options, 0, 2, v02); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 0, 2); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + SeekIterator(iter.get(), 0, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + // 6. Insert to the beginning and the end of the first prefix + Slice v13("v13"); + Slice v18("v18"); + PutKey(db.get(), write_options, 1, 3, v13); + PutKey(db.get(), write_options, 1, 8, v18); + iter.reset(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 3); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v13 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v18 == iter->value()); + + SeekIterator(iter.get(), 0, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + ASSERT_EQ(v22.ToString(), Get(db.get(), read_options, 2, 2)); + ASSERT_EQ(v02.ToString(), Get(db.get(), read_options, 0, 2)); + ASSERT_EQ(v13.ToString(), Get(db.get(), read_options, 1, 3)); + ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); + ASSERT_EQ(v18.ToString(), Get(db.get(), read_options, 1, 8)); + } + } +} + TEST(PrefixTest, DynamicPrefixIterator) { - while (NextOptions()) { + while (NextOptions(FLAGS_bucket_count)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); @@ -260,7 +472,7 @@ TEST(PrefixTest, DynamicPrefixIterator) { } TEST(PrefixTest, PrefixHash) { - while (NextOptions()) { + while (NextOptions(FLAGS_bucket_count)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 405b292da..91efed37f 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -54,22 +54,25 @@ extern "C" { /* Exported types */ -typedef struct rocksdb_t rocksdb_t; -typedef struct rocksdb_cache_t rocksdb_cache_t; -typedef struct rocksdb_comparator_t rocksdb_comparator_t; -typedef struct rocksdb_env_t rocksdb_env_t; -typedef struct rocksdb_filelock_t rocksdb_filelock_t; -typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t; -typedef struct rocksdb_iterator_t rocksdb_iterator_t; -typedef struct rocksdb_logger_t rocksdb_logger_t; -typedef struct rocksdb_options_t rocksdb_options_t; -typedef struct rocksdb_randomfile_t rocksdb_randomfile_t; -typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; -typedef struct rocksdb_seqfile_t rocksdb_seqfile_t; -typedef struct rocksdb_snapshot_t rocksdb_snapshot_t; -typedef struct rocksdb_writablefile_t rocksdb_writablefile_t; -typedef struct rocksdb_writebatch_t rocksdb_writebatch_t; -typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t; +typedef struct rocksdb_t rocksdb_t; +typedef struct rocksdb_cache_t rocksdb_cache_t; +typedef struct rocksdb_comparator_t rocksdb_comparator_t; +typedef struct rocksdb_env_t rocksdb_env_t; +typedef struct rocksdb_filelock_t rocksdb_filelock_t; +typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t; +typedef struct rocksdb_flushoptions_t rocksdb_flushoptions_t; +typedef struct rocksdb_iterator_t rocksdb_iterator_t; +typedef struct rocksdb_logger_t rocksdb_logger_t; +typedef struct rocksdb_mergeoperator_t rocksdb_mergeoperator_t; +typedef struct rocksdb_options_t rocksdb_options_t; +typedef struct rocksdb_randomfile_t rocksdb_randomfile_t; +typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; +typedef struct rocksdb_seqfile_t rocksdb_seqfile_t; +typedef struct rocksdb_slicetransform_t rocksdb_slicetransform_t; +typedef struct rocksdb_snapshot_t rocksdb_snapshot_t; +typedef struct rocksdb_writablefile_t rocksdb_writablefile_t; +typedef struct rocksdb_writebatch_t rocksdb_writebatch_t; +typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t; typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t; /* DB operations */ @@ -94,6 +97,13 @@ extern void rocksdb_delete( const char* key, size_t keylen, char** errptr); +extern void rocksdb_merge( + rocksdb_t* db, + const rocksdb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr); + extern void rocksdb_write( rocksdb_t* db, const rocksdb_writeoptions_t* options, @@ -138,6 +148,20 @@ extern void rocksdb_compact_range( const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); +extern void rocksdb_flush( + rocksdb_t* db, + const rocksdb_flushoptions_t* options, + char** errptr); + +extern void rocksdb_disable_file_deletions( + rocksdb_t* db, + char** errptr); + +extern void rocksdb_enable_file_deletions( + rocksdb_t* db, + unsigned char force, + char** errptr); + /* Management operations */ extern void rocksdb_destroy_db( @@ -172,6 +196,10 @@ extern void rocksdb_writebatch_put( rocksdb_writebatch_t*, const char* key, size_t klen, const char* val, size_t vlen); +extern void rocksdb_writebatch_merge( + rocksdb_writebatch_t*, + const char* key, size_t klen, + const char* val, size_t vlen); extern void rocksdb_writebatch_delete( rocksdb_writebatch_t*, const char* key, size_t klen); @@ -188,6 +216,8 @@ extern void rocksdb_options_destroy(rocksdb_options_t*); extern void rocksdb_options_set_comparator( rocksdb_options_t*, rocksdb_comparator_t*); +extern void rocksdb_options_set_merge_operator(rocksdb_options_t*, + rocksdb_mergeoperator_t*); extern void rocksdb_options_set_compression_per_level( rocksdb_options_t* opt, int* level_values, @@ -206,10 +236,14 @@ extern void rocksdb_options_set_info_log(rocksdb_options_t*, rocksdb_logger_t*); extern void rocksdb_options_set_write_buffer_size(rocksdb_options_t*, size_t); extern void rocksdb_options_set_max_open_files(rocksdb_options_t*, int); extern void rocksdb_options_set_cache(rocksdb_options_t*, rocksdb_cache_t*); +extern void rocksdb_options_set_cache_compressed(rocksdb_options_t*, rocksdb_cache_t*); extern void rocksdb_options_set_block_size(rocksdb_options_t*, size_t); extern void rocksdb_options_set_block_restart_interval(rocksdb_options_t*, int); extern void rocksdb_options_set_compression_options( rocksdb_options_t*, int, int, int); +extern void rocksdb_options_set_whole_key_filtering(rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_prefix_extractor( + rocksdb_options_t*, rocksdb_slicetransform_t*); extern void rocksdb_options_set_num_levels(rocksdb_options_t*, int); extern void rocksdb_options_set_level0_file_num_compaction_trigger( rocksdb_options_t*, int); @@ -217,23 +251,97 @@ extern void rocksdb_options_set_level0_slowdown_writes_trigger( rocksdb_options_t*, int); extern void rocksdb_options_set_level0_stop_writes_trigger( rocksdb_options_t*, int); +extern void rocksdb_options_set_max_mem_compaction_level( + rocksdb_options_t*, int); extern void rocksdb_options_set_target_file_size_base( rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_target_file_size_multiplier( rocksdb_options_t*, int); +extern void rocksdb_options_set_max_bytes_for_level_base( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_max_bytes_for_level_multiplier( + rocksdb_options_t*, int); +extern void rocksdb_options_set_expanded_compaction_factor( + rocksdb_options_t*, int); +extern void rocksdb_options_set_max_grandparent_overlap_factor( + rocksdb_options_t*, int); +extern void rocksdb_options_enable_statistics(rocksdb_options_t*); + extern void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t*, int); extern void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int); extern void rocksdb_options_set_max_background_compactions(rocksdb_options_t*, int); extern void rocksdb_options_set_max_background_flushes(rocksdb_options_t*, int); +extern void rocksdb_options_set_max_log_file_size(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_keep_log_file_num(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_soft_rate_limit(rocksdb_options_t*, double); +extern void rocksdb_options_set_hard_rate_limit(rocksdb_options_t*, double); +extern void rocksdb_options_set_rate_limit_delay_max_milliseconds( + rocksdb_options_t*, unsigned int); +extern void rocksdb_options_set_max_manifest_file_size( + rocksdb_options_t*, size_t); +extern void rocksdb_options_set_no_block_cache( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_table_cache_numshardbits( + rocksdb_options_t*, int); +extern void rocksdb_options_set_table_cache_remove_scan_count_limit( + rocksdb_options_t*, int); +extern void rocksdb_options_set_arena_block_size( + rocksdb_options_t*, size_t); extern void rocksdb_options_set_use_fsync( rocksdb_options_t*, int); +extern void rocksdb_options_set_db_stats_log_interval( + rocksdb_options_t*, int); +extern void rocksdb_options_set_db_log_dir( + rocksdb_options_t*, const char*); +extern void rocksdb_options_set_wal_dir( + rocksdb_options_t*, const char*); +extern void rocksdb_options_set_WAL_ttl_seconds( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_WAL_size_limit_MB( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_manifest_preallocation_size( + rocksdb_options_t*, size_t); +extern void rocksdb_options_set_purge_redundant_kvs_while_flush( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_os_buffer( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_mmap_reads( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_mmap_writes( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_is_fd_close_on_exec( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_skip_log_error_on_recovery( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_stats_dump_period_sec( + rocksdb_options_t*, unsigned int); +extern void rocksdb_options_set_block_size_deviation( + rocksdb_options_t*, int); +extern void rocksdb_options_set_advise_random_on_open( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_use_adaptive_mutex( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_bytes_per_sync( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_filter_deletes( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_max_sequential_skip_in_iterations( + rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_disable_data_sync(rocksdb_options_t*, int); extern void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t*, int); extern void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t*, int); +extern void rocksdb_options_set_delete_obsolete_files_period_micros( + rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_source_compaction_factor(rocksdb_options_t*, int); extern void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t*); extern void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t*); - +extern void rocksdb_options_set_memtable_prefix_bloom_bits( + rocksdb_options_t*, uint32_t); +extern void rocksdb_options_set_memtable_prefix_bloom_probes( + rocksdb_options_t*, uint32_t); +extern void rocksdb_options_set_max_successive_merges( + rocksdb_options_t*, size_t); enum { rocksdb_no_compression = 0, @@ -277,12 +385,39 @@ extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void*, const char* key, size_t length, const char* filter, size_t filter_length), + void (*delete_filter)( + void*, + const char* filter, size_t filter_length), const char* (*name)(void*)); extern void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t*); extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom( int bits_per_key); +/* Merge Operator */ + +extern rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( + void* state, + void (*destructor)(void*), + char* (*full_merge)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length), + char* (*partial_merge)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length), + void (*delete_value)( + void*, + const char* value, size_t value_length), + const char* (*name)(void*)); +extern void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t*); + /* Read options */ extern rocksdb_readoptions_t* rocksdb_readoptions_create(); @@ -292,9 +427,17 @@ extern void rocksdb_readoptions_set_verify_checksums( unsigned char); extern void rocksdb_readoptions_set_fill_cache( rocksdb_readoptions_t*, unsigned char); +extern void rocksdb_readoptions_set_prefix_seek( + rocksdb_readoptions_t*, unsigned char); extern void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t*, const rocksdb_snapshot_t*); +extern void rocksdb_readoptions_set_prefix( + rocksdb_readoptions_t*, const char* key, size_t keylen); +extern void rocksdb_readoptions_set_read_tier( + rocksdb_readoptions_t*, int); +extern void rocksdb_readoptions_set_tailing( + rocksdb_readoptions_t*, unsigned char); /* Write options */ @@ -304,6 +447,13 @@ extern void rocksdb_writeoptions_set_sync( rocksdb_writeoptions_t*, unsigned char); extern void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable); +/* Flush options */ + +extern rocksdb_flushoptions_t* rocksdb_flushoptions_create(); +extern void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t*); +extern void rocksdb_flushoptions_set_wait( + rocksdb_flushoptions_t*, unsigned char); + /* Cache */ extern rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity); @@ -316,6 +466,25 @@ extern void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n); extern void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n); extern void rocksdb_env_destroy(rocksdb_env_t*); +/* SliceTransform */ + +extern rocksdb_slicetransform_t* rocksdb_slicetransform_create( + void* state, + void (*destructor)(void*), + char* (*transform)( + void*, + const char* key, size_t length, + size_t* dst_length), + unsigned char (*in_domain)( + void*, + const char* key, size_t length), + unsigned char (*in_range)( + void*, + const char* key, size_t length), + const char* (*name)(void*)); +extern rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t); +extern void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t*); + /* Universal Compaction options */ enum { diff --git a/linters/cpp_linter/ArcanistCpplintLinter.php b/linters/cpp_linter/ArcanistCpplintLinter.php index cb7842248..b9c413755 100644 --- a/linters/cpp_linter/ArcanistCpplintLinter.php +++ b/linters/cpp_linter/ArcanistCpplintLinter.php @@ -19,32 +19,8 @@ final class ArcanistCpplintLinter extends ArcanistLinter { return 'cpplint.py'; } - public function getLintOptions() { - $config = $this->getEngine()->getConfigurationManager(); - $options = $config->getConfigFromAnySource('lint.cpplint.options', ''); - - return $options; - } - public function getLintPath() { - $config = $this->getEngine()->getConfigurationManager(); - $prefix = $config->getConfigFromAnySource('lint.cpplint.prefix'); - $bin = $config->getConfigFromAnySource('lint.cpplint.bin', 'cpplint.py'); - - if ($prefix !== null) { - if (!Filesystem::pathExists($prefix.'/'.$bin)) { - throw new ArcanistUsageException( - "Unable to find cpplint.py binary in a specified directory. Make ". - "sure that 'lint.cpplint.prefix' and 'lint.cpplint.bin' keys are ". - "set correctly. If you'd rather use a copy of cpplint installed ". - "globally, you can just remove these keys from your .arcconfig."); - } - - $bin = csprintf("%s/%s", $prefix, $bin); - - return $bin; - } - + $bin = 'cpplint.py'; // Search under current dir list($err) = exec_manual('which %s/%s', $this->linterDir(), $bin); if (!$err) { @@ -57,7 +33,7 @@ final class ArcanistCpplintLinter extends ArcanistLinter { throw new ArcanistUsageException( "cpplint.py does not appear to be installed on this system. Install ". "it (e.g., with 'wget \"http://google-styleguide.googlecode.com/". - "svn/trunk/cpplint/cpplint.py\"') or configure 'lint.cpplint.prefix' ". + "svn/trunk/cpplint/cpplint.py\"') ". "in your .arcconfig to point to the directory where it resides. ". "Also don't forget to chmod a+x cpplint.py!"); } @@ -67,10 +43,9 @@ final class ArcanistCpplintLinter extends ArcanistLinter { public function lintPath($path) { $bin = $this->getLintPath(); - $options = $this->getLintOptions(); $path = $this->rocksdbDir().'/'.$path; - $f = new ExecFuture("%C %C $path", $bin, $options); + $f = new ExecFuture("%C $path", $bin); list($err, $stdout, $stderr) = $f->resolve(); diff --git a/linters/cpp_linter/cpplint.py b/linters/cpp_linter/cpplint.py index 814b44f20..d264b00da 100755 --- a/linters/cpp_linter/cpplint.py +++ b/linters/cpp_linter/cpplint.py @@ -1420,6 +1420,9 @@ def CheckForHeaderGuard(filename, lines, error): endif = None endif_linenum = 0 for linenum, line in enumerate(lines): + # Already been well guarded, no need for further checking. + if line.strip() == "#pragma once": + return linesplit = line.split() if len(linesplit) >= 2: # find the first occurrence of #ifndef and #define, save arg @@ -3101,6 +3104,11 @@ def CheckBraces(filename, clean_lines, linenum, error): 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or Search(r'\s+=\s*$', line_prefix)): match = None + # Whitelist lambda function definition which also requires a ";" after + # closing brace + if match: + if Match(r'^.*\[.*\]\s*(.*\)\s*)\{', line): + match = None else: # Try matching cases 2-3. diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 88ec65557..4921d28f4 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -300,7 +300,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, assert(index_block->compressionType() == kNoCompression); rep->index_block.reset(index_block); - // Set index block + // Set filter block if (rep->options.filter_policy) { std::string key = kFilterBlockPrefix; key.append(rep->options.filter_policy->Name()); @@ -681,8 +681,14 @@ Iterator* BlockBasedTable::BlockReader(void* arg, BlockBasedTable::CachableEntry BlockBasedTable::GetFilter(bool no_io) const { - if (!rep_->options.filter_policy || !rep_->options.block_cache) { - return {rep_->filter.get(), nullptr}; + // filter pre-populated + if (rep_->filter != nullptr) { + return {rep_->filter.get(), nullptr /* cache handle */}; + } + + if (rep_->options.filter_policy == nullptr /* do not use filter at all */ || + rep_->options.block_cache == nullptr /* no block cache at all */) { + return {nullptr /* filter */, nullptr /* cache handle */}; } // Fetching from the cache @@ -979,4 +985,12 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { return result; } +bool BlockBasedTable::TEST_filter_block_preloaded() const { + return rep_->filter != nullptr; +} + +bool BlockBasedTable::TEST_index_block_preloaded() const { + return rep_->index_block != nullptr; +} + } // namespace rocksdb diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index c711e7036..fc584d9ec 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -90,6 +90,9 @@ class BlockBasedTable : public TableReader { ~BlockBasedTable(); + bool TEST_filter_block_preloaded() const; + bool TEST_index_block_preloaded() const; + private: template struct CachableEntry; diff --git a/table/table_test.cc b/table/table_test.cc index 9f6efbf50..2f6881dd0 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -29,6 +29,7 @@ #include "rocksdb/memtablerep.h" #include "table/block.h" #include "table/meta_blocks.h" +#include "table/block_based_table_reader.h" #include "table/block_based_table_builder.h" #include "table/block_based_table_factory.h" #include "table/block_based_table_reader.h" @@ -961,6 +962,7 @@ class BlockBasedTableTest : public TableTest {}; class PlainTableTest : public TableTest {}; class TablePropertyTest {}; +/* // This test serves as the living tutorial for the prefix scan of user collected // properties. TEST(TablePropertyTest, PrefixScanTest) { @@ -1122,19 +1124,37 @@ TEST(BlockBasedTableTest, NumBlockStat) { ASSERT_EQ(kvmap.size(), c.table_reader()->GetTableProperties()->num_data_blocks); } +*/ -class BlockCacheProperties { +// A simple tool that takes the snapshot of block cache statistics. +class BlockCachePropertiesSnapshot { public: - explicit BlockCacheProperties(Statistics* statistics) { + explicit BlockCachePropertiesSnapshot(Statistics* statistics) { block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS); block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT); index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS); index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT); data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS); data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT); + filter_block_cache_miss = + statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS); + filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT); + } + + void AssertIndexBlockStat(int64_t index_block_cache_miss, + int64_t index_block_cache_hit) { + ASSERT_EQ(index_block_cache_miss, this->index_block_cache_miss); + ASSERT_EQ(index_block_cache_hit, this->index_block_cache_hit); + } + + void AssertFilterBlockStat(int64_t filter_block_cache_miss, + int64_t filter_block_cache_hit) { + ASSERT_EQ(filter_block_cache_miss, this->filter_block_cache_miss); + ASSERT_EQ(filter_block_cache_hit, this->filter_block_cache_hit); } // Check if the fetched props matches the expected ones. + // TODO(kailiu) Use this only when you disabled filter policy! void AssertEqual(int64_t index_block_cache_miss, int64_t index_block_cache_hit, int64_t data_block_cache_miss, int64_t data_block_cache_hit) const { @@ -1155,9 +1175,55 @@ class BlockCacheProperties { int64_t index_block_cache_hit = 0; int64_t data_block_cache_miss = 0; int64_t data_block_cache_hit = 0; + int64_t filter_block_cache_miss = 0; + int64_t filter_block_cache_hit = 0; }; -TEST(BlockBasedTableTest, BlockCacheTest) { +// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't +// use block cache to store them). +TEST(BlockBasedTableTest, BlockCacheDisabledTest) { + Options options; + options.create_if_missing = true; + options.statistics = CreateDBStatistics(); + options.block_cache = NewLRUCache(1024); + std::unique_ptr filter_policy(NewBloomFilterPolicy(10)); + options.filter_policy = filter_policy.get(); + BlockBasedTableOptions table_options; + // Intentionally commented out: table_options.cache_index_and_filter_blocks = + // true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + std::vector keys; + KVMap kvmap; + + TableConstructor c(BytewiseComparator()); + c.Add("key", "value"); + c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, + &kvmap); + + // preloading filter/index blocks is enabled. + auto reader = dynamic_cast(c.table_reader()); + ASSERT_TRUE(reader->TEST_filter_block_preloaded()); + ASSERT_TRUE(reader->TEST_index_block_preloaded()); + + { + // nothing happens in the beginning + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } + + { + // a hack that just to trigger BlockBasedTable::GetFilter. + reader->Get(ReadOptions(), "non-exist-key", nullptr, nullptr, nullptr); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } +} + +// Due to the difficulities of the intersaction between statistics, this test +// only tests the case when "index block is put to block cache" +TEST(BlockBasedTableTest, FilterBlockInBlockCache) { // -- Table construction Options options; options.create_if_missing = true; @@ -1175,6 +1241,10 @@ TEST(BlockBasedTableTest, BlockCacheTest) { c.Add("key", "value"); c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); + // preloading filter/index blocks is prohibited. + auto reader = dynamic_cast(c.table_reader()); + ASSERT_TRUE(!reader->TEST_filter_block_preloaded()); + ASSERT_TRUE(!reader->TEST_index_block_preloaded()); // -- PART 1: Open with regular block cache. // Since block_cache is disabled, no cache activities will be involved. @@ -1182,7 +1252,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // At first, no block will be accessed. { - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // index will be added to block cache. props.AssertEqual(1, // index block miss 0, 0, 0); @@ -1191,7 +1261,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // Only index block will be accessed { iter.reset(c.NewIterator()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // NOTE: to help better highlight the "detla" of each ticker, I use // + to indicate the increment of changed // value; other numbers remain the same. @@ -1202,7 +1272,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // Only data block will be accessed { iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1, 0 + 1, // data block miss 0); } @@ -1211,7 +1281,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { { iter.reset(c.NewIterator()); iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1 + 1, /* index block hit */ 1, 0 + 1 /* data block hit */); } @@ -1227,7 +1297,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { iter.reset(c.NewIterator()); iter->SeekToFirst(); ASSERT_EQ("key", iter->key().ToString()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // Nothing is affected at all props.AssertEqual(0, 0, 0, 0); } @@ -1238,7 +1308,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { options.block_cache = NewLRUCache(1); c.Reopen(options); { - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, // index block miss 0, 0, 0); } @@ -1249,7 +1319,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // It first cache index block then data block. But since the cache size // is only 1, index block will be purged after data block is inserted. iter.reset(c.NewIterator()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1 + 1, // index block miss 0, 0, // data block miss 0); @@ -1259,7 +1329,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // SeekToFirst() accesses data block. With similar reason, we expect data // block's cache miss. iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(2, 0, 0 + 1, // data block miss 0); } diff --git a/util/log_write_bench.cc b/util/log_write_bench.cc new file mode 100644 index 000000000..642d4e832 --- /dev/null +++ b/util/log_write_bench.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include + +#include "rocksdb/env.h" +#include "util/histogram.h" +#include "util/testharness.h" +#include "util/testutil.h" + +// A simple benchmark to simulate transactional logs + +DEFINE_int32(num_records, 6000, "Size of each record."); +DEFINE_int32(record_size, 249, "Size of each record."); +DEFINE_int32(record_interval, 10000, "Interval between records (microSec)"); +DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions"); +DEFINE_bool(enable_sync, false, "sync after each write."); + +namespace rocksdb { +void RunBenchmark() { + std::string file_name = test::TmpDir() + "/log_write_bench.log"; + Env* env = Env::Default(); + EnvOptions env_options; + env_options.use_mmap_writes = false; + env_options.bytes_per_sync = FLAGS_bytes_per_sync; + unique_ptr file; + env->NewWritableFile(file_name, &file, env_options); + + std::string record; + record.assign('X', FLAGS_record_size); + + HistogramImpl hist; + + uint64_t start_time = env->NowMicros(); + for (int i = 0; i < FLAGS_num_records; i++) { + uint64_t start_nanos = env->NowNanos(); + file->Append(record); + file->Flush(); + if (FLAGS_enable_sync) { + file->Sync(); + } + hist.Add(env->NowNanos() - start_nanos); + + if (i % 1000 == 1) { + fprintf(stderr, "Wrote %d records...\n", i); + } + + int time_to_sleep = + (i + 1) * FLAGS_record_interval - (env->NowMicros() - start_time); + if (time_to_sleep > 0) { + env->SleepForMicroseconds(time_to_sleep); + } + } + + fprintf(stderr, "Distribution of latency of append+flush: \n%s", + hist.ToString().c_str()); +} +} // namespace rocksdb + +int main(int argc, char** argv) { + google::SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + + " [OPTIONS]..."); + google::ParseCommandLineFlags(&argc, &argv, true); + + rocksdb::RunBenchmark(); + return 0; +}