Move advanced column family options to advanced_options.h

Summary: For the sake of making our options simpler, we should keep options.h as simple as possible and move more advanced/less common options to advaned_options.h I started with ColumnFamilyOptions and also did some re-ordering I have moved all ColumnFamilyOptions to advanced_options.h and only left these options in options.h ``` const Comparator* comparator = BytewiseComparator(); std::shared_ptr<MergeOperator> merge_operator = nullptr; const CompactionFilter* compaction_filter = nullptr; std::shared_ptr<CompactionFilterFactory> compaction_filter_factory = nullptr; size_t write_buffer_size = 64 << 20; CompressionType compression; int level0_file_num_compaction_trigger = 4; bool disable_auto_compactions = false; ``` Please feel free to comment on specific options if you think they should be advanced or should not be Closes https://github.com/facebook/rocksdb/pull/1847 Differential Revision: D4519996 Pulled By: IslamAbdelRahman fbshipit-source-id: abebd9a
9 years ago · 08864df212
parent 2ca2059f66
commit 08864df212
5 changed files with 720 additions and 673 deletions
--- a/include/rocksdb/advanced_options.h
+++ b/include/rocksdb/advanced_options.h
@ -0,0 +1,558 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <memory>
+
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/universal_compaction.h"
+
+namespace rocksdb {
+
+class Slice;
+class SliceTransform;
+enum CompressionType : unsigned char;
+class TablePropertiesCollectorFactory;
+class TableFactory;
+struct Options;
+
+enum CompactionStyle : char {
+  // level based compaction style
+  kCompactionStyleLevel = 0x0,
+  // Universal compaction style
+  // Not supported in ROCKSDB_LITE.
+  kCompactionStyleUniversal = 0x1,
+  // FIFO compaction style
+  // Not supported in ROCKSDB_LITE
+  kCompactionStyleFIFO = 0x2,
+  // Disable background compaction. Compaction jobs are submitted
+  // via CompactFiles().
+  // Not supported in ROCKSDB_LITE
+  kCompactionStyleNone = 0x3,
+};
+
+// In Level-based comapction, it Determines which file from a level to be
+// picked to merge to the next level. We suggest people try
+// kMinOverlappingRatio first when you tune your database.
+enum CompactionPri : char {
+  // Slightly Priotize larger files by size compensated by #deletes
+  kByCompensatedSize = 0x0,
+  // First compact files whose data's latest update time is oldest.
+  // Try this if you only update some hot keys in small ranges.
+  kOldestLargestSeqFirst = 0x1,
+  // First compact files whose range hasn't been compacted to the next level
+  // for the longest. If your updates are random across the key space,
+  // write amplification is slightly better with this option.
+  kOldestSmallestSeqFirst = 0x2,
+  // First compact files whose ratio between overlapping size in next level
+  // and its size is the smallest. It in many cases can optimize write
+  // amplification.
+  kMinOverlappingRatio = 0x3,
+};
+
+struct CompactionOptionsFIFO {
+  // once the total sum of table files reaches this, we will delete the oldest
+  // table file
+  // Default: 1GB
+  uint64_t max_table_files_size;
+
+  CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
+};
+
+// Compression options for different compression algorithms like Zlib
+struct CompressionOptions {
+  int window_bits;
+  int level;
+  int strategy;
+  // Maximum size of dictionary used to prime the compression library. Currently
+  // this dictionary will be constructed by sampling the first output file in a
+  // subcompaction when the target level is bottommost. This dictionary will be
+  // loaded into the compression library before compressing/uncompressing each
+  // data block of subsequent files in the subcompaction. Effectively, this
+  // improves compression ratios when there are repetitions across data blocks.
+  // A value of 0 indicates the feature is disabled.
+  // Default: 0.
+  uint32_t max_dict_bytes;
+
+  CompressionOptions()
+      : window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {}
+  CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes)
+      : window_bits(wbits),
+        level(_lev),
+        strategy(_strategy),
+        max_dict_bytes(_max_dict_bytes) {}
+};
+
+enum UpdateStatus {    // Return status For inplace update callback
+  UPDATE_FAILED   = 0, // Nothing to update
+  UPDATED_INPLACE = 1, // Value updated inplace
+  UPDATED         = 2, // No inplace update. Merged value set
+};
+
+
+struct AdvancedColumnFamilyOptions {
+  // The maximum number of write buffers that are built up in memory.
+  // The default and the minimum number is 2, so that when 1 write buffer
+  // is being flushed to storage, new writes can continue to the other
+  // write buffer.
+  // If max_write_buffer_number > 3, writing will be slowed down to
+  // options.delayed_write_rate if we are writing to the last write buffer
+  // allowed.
+  //
+  // Default: 2
+  //
+  // Dynamically changeable through SetOptions() API
+  int max_write_buffer_number = 2;
+
+  // The minimum number of write buffers that will be merged together
+  // before writing to storage.  If set to 1, then
+  // all write buffers are flushed to L0 as individual files and this increases
+  // read amplification because a get request has to check in all of these
+  // files. Also, an in-memory merge may result in writing lesser
+  // data to storage if there are duplicate records in each of these
+  // individual write buffers.  Default: 1
+  int min_write_buffer_number_to_merge = 1;
+
+  // The total maximum number of write buffers to maintain in memory including
+  // copies of buffers that have already been flushed.  Unlike
+  // max_write_buffer_number, this parameter does not affect flushing.
+  // This controls the minimum amount of write history that will be available
+  // in memory for conflict checking when Transactions are used.
+  //
+  // When using an OptimisticTransactionDB:
+  // If this value is too low, some transactions may fail at commit time due
+  // to not being able to determine whether there were any write conflicts.
+  //
+  // When using a TransactionDB:
+  // If Transaction::SetSnapshot is used, TransactionDB will read either
+  // in-memory write buffers or SST files to do write-conflict checking.
+  // Increasing this value can reduce the number of reads to SST files
+  // done for conflict detection.
+  //
+  // Setting this value to 0 will cause write buffers to be freed immediately
+  // after they are flushed.
+  // If this value is set to -1, 'max_write_buffer_number' will be used.
+  //
+  // Default:
+  // If using a TransactionDB/OptimisticTransactionDB, the default value will
+  // be set to the value of 'max_write_buffer_number' if it is not explicitly
+  // set by the user.  Otherwise, the default is 0.
+  int max_write_buffer_number_to_maintain = 0;
+
+  // Allows thread-safe inplace updates. If this is true, there is no way to
+  // achieve point-in-time consistency using snapshot or iterator (assuming
+  // concurrent updates). Hence iterator and multi-get will return results
+  // which are not consistent as of any point-in-time.
+  // If inplace_callback function is not set,
+  //   Put(key, new_value) will update inplace the existing_value iff
+  //   * key exists in current memtable
+  //   * new sizeof(new_value) <= sizeof(existing_value)
+  //   * existing_value for that key is a put i.e. kTypeValue
+  // If inplace_callback function is set, check doc for inplace_callback.
+  // Default: false.
+  bool inplace_update_support = false;
+
+  // Number of locks used for inplace update
+  // Default: 10000, if inplace_update_support = true, else 0.
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t inplace_update_num_locks = 10000;
+
+  // existing_value - pointer to previous value (from both memtable and sst).
+  //                  nullptr if key doesn't exist
+  // existing_value_size - pointer to size of existing_value).
+  //                       nullptr if key doesn't exist
+  // delta_value - Delta value to be merged with the existing_value.
+  //               Stored in transaction logs.
+  // merged_value - Set when delta is applied on the previous value.
+
+  // Applicable only when inplace_update_support is true,
+  // this callback function is called at the time of updating the memtable
+  // as part of a Put operation, lets say Put(key, delta_value). It allows the
+  // 'delta_value' specified as part of the Put operation to be merged with
+  // an 'existing_value' of the key in the database.
+
+  // If the merged value is smaller in size that the 'existing_value',
+  // then this function can update the 'existing_value' buffer inplace and
+  // the corresponding 'existing_value'_size pointer, if it wishes to.
+  // The callback should return UpdateStatus::UPDATED_INPLACE.
+  // In this case. (In this case, the snapshot-semantics of the rocksdb
+  // Iterator is not atomic anymore).
+
+  // If the merged value is larger in size than the 'existing_value' or the
+  // application does not wish to modify the 'existing_value' buffer inplace,
+  // then the merged value should be returned via *merge_value. It is set by
+  // merging the 'existing_value' and the Put 'delta_value'. The callback should
+  // return UpdateStatus::UPDATED in this case. This merged value will be added
+  // to the memtable.
+
+  // If merging fails or the application does not wish to take any action,
+  // then the callback should return UpdateStatus::UPDATE_FAILED.
+
+  // Please remember that the original call from the application is Put(key,
+  // delta_value). So the transaction log (if enabled) will still contain (key,
+  // delta_value). The 'merged_value' is not stored in the transaction log.
+  // Hence the inplace_callback function should be consistent across db reopens.
+
+  // Default: nullptr
+  UpdateStatus (*inplace_callback)(char* existing_value,
+                                   uint32_t* existing_value_size,
+                                   Slice delta_value,
+                                   std::string* merged_value) = nullptr;
+
+  // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0,
+  // create prefix bloom for memtable with the size of
+  // write_buffer_size * memtable_prefix_bloom_size_ratio.
+  // If it is larger than 0.25, it is santinized to 0.25.
+  //
+  // Default: 0 (disable)
+  //
+  // Dynamically changeable through SetOptions() API
+  double memtable_prefix_bloom_size_ratio = 0.0;
+
+  // Page size for huge page for the arena used by the memtable. If <=0, it
+  // won't allocate from huge page but from malloc.
+  // Users are responsible to reserve huge pages for it to be allocated. For
+  // example:
+  //      sysctl -w vm.nr_hugepages=20
+  // See linux doc Documentation/vm/hugetlbpage.txt
+  // If there isn't enough free huge page available, it will fall back to
+  // malloc.
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t memtable_huge_page_size = 0;
+
+  // If non-nullptr, memtable will use the specified function to extract
+  // prefixes for keys, and for each prefix maintain a hint of insert location
+  // to reduce CPU usage for inserting keys with the prefix. Keys out of
+  // domain of the prefix extractor will be insert without using hints.
+  //
+  // Currently only the default skiplist based memtable implements the feature.
+  // All other memtable implementation will ignore the option. It incurs ~250
+  // additional bytes of memory overhead to store a hint for each prefix.
+  // Also concurrent writes (when allow_concurrent_memtable_write is true) will
+  // ignore the option.
+  //
+  // The option is best suited for workloads where keys will likely to insert
+  // to a location close the the last inserted key with the same prefix.
+  // One example could be inserting keys of the form (prefix + timestamp),
+  // and keys of the same prefix always comes in with time order. Another
+  // example would be updating the same key over and over again, in which case
+  // the prefix can be the key itself.
+  //
+  // Default: nullptr (disable)
+  std::shared_ptr<const SliceTransform>
+      memtable_insert_with_hint_prefix_extractor = nullptr;
+
+  // Control locality of bloom filter probes to improve cache miss rate.
+  // This option only applies to memtable prefix bloom and plaintable
+  // prefix bloom. It essentially limits every bloom checking to one cache line.
+  // This optimization is turned off when set to 0, and positive number to turn
+  // it on.
+  // Default: 0
+  uint32_t bloom_locality = 0;
+
+  // size of one block in arena memory allocation.
+  // If <= 0, a proper value is automatically calculated (usually 1/8 of
+  // writer_buffer_size, rounded up to a multiple of 4KB).
+  //
+  // There are two additional restriction of the The specified size:
+  // (1) size should be in the range of [4096, 2 << 30] and
+  // (2) be the multiple of the CPU word (which helps with the memory
+  // alignment).
+  //
+  // We'll automatically check and adjust the size number to make sure it
+  // conforms to the restrictions.
+  //
+  // Default: 0
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t arena_block_size = 0;
+
+  // Different levels can have different compression policies. There
+  // are cases where most lower levels would like to use quick compression
+  // algorithms while the higher levels (which have more data) use
+  // compression algorithms that have better compression but could
+  // be slower. This array, if non-empty, should have an entry for
+  // each level of the database; these override the value specified in
+  // the previous field 'compression'.
+  //
+  // NOTICE if level_compaction_dynamic_level_bytes=true,
+  // compression_per_level[0] still determines L0, but other elements
+  // of the array are based on base level (the level L0 files are merged
+  // to), and may not match the level users see from info log for metadata.
+  // If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
+  // determines compaction type for level n+i-1.
+  // For example, if we have three 5 levels, and we determine to merge L0
+  // data to L4 (which means L1..L3 will be empty), then the new files go to
+  // L4 uses compression type compression_per_level[1].
+  // If now L0 is merged to L2. Data goes to L2 will be compressed
+  // according to compression_per_level[1], L3 using compression_per_level[2]
+  // and L4 using compression_per_level[3]. Compaction for each level can
+  // change when data grows.
+  std::vector<CompressionType> compression_per_level;
+
+  // Number of levels for this database
+  int num_levels = 7;
+
+  // Soft limit on number of level-0 files. We start slowing down writes at this
+  // point. A value <0 means that no writing slow down will be triggered by
+  // number of files in level-0.
+  //
+  // Default: 20
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_slowdown_writes_trigger = 20;
+
+  // Maximum number of level-0 files.  We stop writes at this point.
+  //
+  // Default: 36
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_stop_writes_trigger = 36;
+
+  // Target file size for compaction.
+  // target_file_size_base is per-file size for level-1.
+  // Target file size for level L can be calculated by
+  // target_file_size_base * (target_file_size_multiplier ^ (L-1))
+  // For example, if target_file_size_base is 2MB and
+  // target_file_size_multiplier is 10, then each file on level-1 will
+  // be 2MB, and each file on level 2 will be 20MB,
+  // and each file on level-3 will be 200MB.
+  //
+  // Default: 64MB.
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t target_file_size_base = 64 * 1048576;
+
+  // By default target_file_size_multiplier is 1, which means
+  // by default files in different levels will have similar size.
+  //
+  // Dynamically changeable through SetOptions() API
+  int target_file_size_multiplier = 1;
+
+  // If true, RocksDB will pick target size of each level dynamically.
+  // We will pick a base level b >= 1. L0 will be directly merged into level b,
+  // instead of always into level 1. Level 1 to b-1 need to be empty.
+  // We try to pick b and its target size so that
+  // 1. target size is in the range of
+  //   (max_bytes_for_level_base / max_bytes_for_level_multiplier,
+  //    max_bytes_for_level_base]
+  // 2. target size of the last level (level num_levels-1) equals to extra size
+  //    of the level.
+  // At the same time max_bytes_for_level_multiplier and
+  // max_bytes_for_level_multiplier_additional are still satisfied.
+  //
+  // With this option on, from an empty DB, we make last level the base level,
+  // which means merging L0 data into the last level, until it exceeds
+  // max_bytes_for_level_base. And then we make the second last level to be
+  // base level, to start to merge L0 data to second last level, with its
+  // target size to be 1/max_bytes_for_level_multiplier of the last level's
+  // extra size. After the data accumulates more so that we need to move the
+  // base level to the third last one, and so on.
+  //
+  // For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
+  // and max_bytes_for_level_base=10MB.
+  // Target sizes of level 1 to 5 starts with:
+  // [- - - - 10MB]
+  // with base level is level. Target sizes of level 1 to 4 are not applicable
+  // because they will not be used.
+  // Until the size of Level 5 grows to more than 10MB, say 11MB, we make
+  // base target to level 4 and now the targets looks like:
+  // [- - - 1.1MB 11MB]
+  // While data are accumulated, size targets are tuned based on actual data
+  // of level 5. When level 5 has 50MB of data, the target is like:
+  // [- - - 5MB 50MB]
+  // Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
+  // level 4 to be the base level, its target size needs to be 10.1MB, which
+  // doesn't satisfy the target size range. So now we make level 3 the target
+  // size and the target sizes of the levels look like:
+  // [- - 1.01MB 10.1MB 101MB]
+  // In the same way, while level 5 further grows, all levels' targets grow,
+  // like
+  // [- - 5MB 50MB 500MB]
+  // Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
+  // base level and make levels' target sizes like this:
+  // [- 1.001MB 10.01MB 100.1MB 1001MB]
+  // and go on...
+  //
+  // By doing it, we give max_bytes_for_level_multiplier a priority against
+  // max_bytes_for_level_base, for a more predictable LSM tree shape. It is
+  // useful to limit worse case space amplification.
+  //
+  // max_bytes_for_level_multiplier_additional is ignored with this flag on.
+  //
+  // Turning this feature on or off for an existing DB can cause unexpected
+  // LSM tree structure so it's not recommended.
+  //
+  // NOTE: this option is experimental
+  //
+  // Default: false
+  bool level_compaction_dynamic_level_bytes = false;
+
+  // Default: 10.
+  //
+  // Dynamically changeable through SetOptions() API
+  double max_bytes_for_level_multiplier = 10;
+
+  // Different max-size multipliers for different levels.
+  // These are multiplied by max_bytes_for_level_multiplier to arrive
+  // at the max-size of each level.
+  //
+  // Default: 1
+  //
+  // Dynamically changeable through SetOptions() API
+  std::vector<int> max_bytes_for_level_multiplier_additional =
+      std::vector<int>(num_levels, 1);
+
+  // We try to limit number of bytes in one compaction to be lower than this
+  // threshold. But it's not guaranteed.
+  // Value 0 will be sanitized.
+  //
+  // Default: result.target_file_size_base * 25
+  uint64_t max_compaction_bytes = 0;
+
+  // All writes will be slowed down to at least delayed_write_rate if estimated
+  // bytes needed to be compaction exceed this threshold.
+  //
+  // Default: 64GB
+  uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
+
+  // All writes are stopped if estimated bytes needed to be compaction exceed
+  // this threshold.
+  //
+  // Default: 256GB
+  uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
+
+  // The compaction style. Default: kCompactionStyleLevel
+  CompactionStyle compaction_style = kCompactionStyleLevel;
+
+  // If level compaction_style = kCompactionStyleLevel, for each level,
+  // which files are prioritized to be picked to compact.
+  // Default: kByCompensatedSize
+  CompactionPri compaction_pri = kByCompensatedSize;
+
+  // The options needed to support Universal Style compactions
+  CompactionOptionsUniversal compaction_options_universal;
+
+  // The options for FIFO compaction style
+  CompactionOptionsFIFO compaction_options_fifo;
+
+  // An iteration->Next() sequentially skips over keys with the same
+  // user-key unless this option is set. This number specifies the number
+  // of keys (with the same userkey) that will be sequentially
+  // skipped before a reseek is issued.
+  //
+  // Default: 8
+  //
+  // Dynamically changeable through SetOptions() API
+  uint64_t max_sequential_skip_in_iterations = 8;
+
+  // This is a factory that provides MemTableRep objects.
+  // Default: a factory that provides a skip-list-based implementation of
+  // MemTableRep.
+  std::shared_ptr<MemTableRepFactory> memtable_factory =
+      std::shared_ptr<SkipListFactory>(new SkipListFactory);
+
+  // Block-based table related options are moved to BlockBasedTableOptions.
+  // Related options that were originally here but now moved include:
+  //   no_block_cache
+  //   block_cache
+  //   block_cache_compressed
+  //   block_size
+  //   block_size_deviation
+  //   block_restart_interval
+  //   filter_policy
+  //   whole_key_filtering
+  // If you'd like to customize some of these options, you will need to
+  // use NewBlockBasedTableFactory() to construct a new table factory.
+
+  // This option allows user to collect their own interested statistics of
+  // the tables.
+  // Default: empty vector -- no user-defined statistics collection will be
+  // performed.
+  typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+      TablePropertiesCollectorFactories;
+  TablePropertiesCollectorFactories table_properties_collector_factories;
+
+  // Maximum number of successive merge operations on a key in the memtable.
+  //
+  // When a merge operation is added to the memtable and the maximum number of
+  // successive merges is reached, the value of the key will be calculated and
+  // inserted into the memtable instead of the merge operation. This will
+  // ensure that there are never more than max_successive_merges merge
+  // operations in the memtable.
+  //
+  // Default: 0 (disabled)
+  //
+  // Dynamically changeable through SetOptions() API
+  size_t max_successive_merges = 0;
+
+  // This flag specifies that the implementation should optimize the filters
+  // mainly for cases where keys are found rather than also optimize for keys
+  // missed. This would be used in cases where the application knows that
+  // there are very few misses or the performance in the case of misses is not
+  // important.
+  //
+  // For now, this flag allows us to not store filters for the last level i.e
+  // the largest level which contains data of the LSM store. For keys which
+  // are hits, the filters in this level are not useful because we will search
+  // for the data anyway. NOTE: the filters in other levels are still useful
+  // even for key hit because they tell us whether to look in that level or go
+  // to the higher level.
+  //
+  // Default: false
+  bool optimize_filters_for_hits = false;
+
+  // After writing every SST file, reopen it and read all the keys.
+  // Default: false
+  bool paranoid_file_checks = false;
+
+  // In debug mode, RocksDB run consistency checks on the LSM everytime the LSM
+  // change (Flush, Compaction, AddFile). These checks are disabled in release
+  // mode, use this option to enable them in release mode as well.
+  // Default: false
+  bool force_consistency_checks = false;
+
+  // Measure IO stats in compactions and flushes, if true.
+  // Default: false
+  bool report_bg_io_stats = false;
+
+  // Create ColumnFamilyOptions with default values for all fields
+  AdvancedColumnFamilyOptions();
+  // Create ColumnFamilyOptions from Options
+  explicit AdvancedColumnFamilyOptions(const Options& options);
+
+  // ---------------- DEPRECATED OPTIONS ----------------
+
+  // DEPRECATED
+  // This does not do anything anymore.
+  int max_mem_compaction_level;
+
+  // DEPRECATED -- this options is no longer used
+  // Puts are delayed to options.delayed_write_rate when any level has a
+  // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
+  //
+  // Default: 0 (disabled)
+  //
+  // Dynamically changeable through SetOptions() API
+  double soft_rate_limit = 0.0;
+
+  // DEPRECATED -- this options is no longer used
+  double hard_rate_limit = 0.0;
+
+  // DEPRECATED -- this options is no longer used
+  unsigned int rate_limit_delay_max_milliseconds = 100;
+
+  // DEPREACTED
+  // Does not have any effect.
+  bool purge_redundant_kvs_while_flush = true;
+};
+
+}  // namespace rocksdb
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@ -17,10 +17,10 @@
 #include <limits>
 #include <unordered_map>

+#include "rocksdb/advanced_options.h"
 #include "rocksdb/comparator.h"
 #include "rocksdb/env.h"
 #include "rocksdb/listener.h"
-#include "rocksdb/memtablerep.h"
 #include "rocksdb/universal_compaction.h"
 #include "rocksdb/version.h"
 #include "rocksdb/write_buffer_manager.h"
@ -42,12 +42,9 @@ class FilterPolicy;
 class Logger;
 class MergeOperator;
 class Snapshot;
-class TableFactory;
 class MemTableRepFactory;
-class TablePropertiesCollectorFactory;
 class RateLimiter;
 class Slice;
-class SliceTransform;
 class Statistics;
 class InternalKeyComparator;
 class WalFilter;
@ -79,113 +76,9 @@ enum CompressionType : unsigned char {
  kDisableCompressionOption = 0xff,
 };

-enum CompactionStyle : char {
-  // level based compaction style
-  kCompactionStyleLevel = 0x0,
-  // Universal compaction style
-  // Not supported in ROCKSDB_LITE.
-  kCompactionStyleUniversal = 0x1,
-  // FIFO compaction style
-  // Not supported in ROCKSDB_LITE
-  kCompactionStyleFIFO = 0x2,
-  // Disable background compaction. Compaction jobs are submitted
-  // via CompactFiles().
-  // Not supported in ROCKSDB_LITE
-  kCompactionStyleNone = 0x3,
-};
-
-// In Level-based comapction, it Determines which file from a level to be
-// picked to merge to the next level. We suggest people try
-// kMinOverlappingRatio first when you tune your database.
-enum CompactionPri : char {
-  // Slightly Priotize larger files by size compensated by #deletes
-  kByCompensatedSize = 0x0,
-  // First compact files whose data's latest update time is oldest.
-  // Try this if you only update some hot keys in small ranges.
-  kOldestLargestSeqFirst = 0x1,
-  // First compact files whose range hasn't been compacted to the next level
-  // for the longest. If your updates are random across the key space,
-  // write amplification is slightly better with this option.
-  kOldestSmallestSeqFirst = 0x2,
-  // First compact files whose ratio between overlapping size in next level
-  // and its size is the smallest. It in many cases can optimize write
-  // amplification.
-  kMinOverlappingRatio = 0x3,
-};
-
-enum class WALRecoveryMode : char {
-  // Original levelDB recovery
-  // We tolerate incomplete record in trailing data on all logs
-  // Use case : This is legacy behavior (default)
-  kTolerateCorruptedTailRecords = 0x00,
-  // Recover from clean shutdown
-  // We don't expect to find any corruption in the WAL
-  // Use case : This is ideal for unit tests and rare applications that
-  // can require high consistency guarantee
-  kAbsoluteConsistency = 0x01,
-  // Recover to point-in-time consistency
-  // We stop the WAL playback on discovering WAL inconsistency
-  // Use case : Ideal for systems that have disk controller cache like
-  // hard disk, SSD without super capacitor that store related data
-  kPointInTimeRecovery = 0x02,
-  // Recovery after a disaster
-  // We ignore any corruption in the WAL and try to salvage as much data as
-  // possible
-  // Use case : Ideal for last ditch effort to recover data or systems that
-  // operate with low grade unrelated data
-  kSkipAnyCorruptedRecords = 0x03,
-};
-
-struct CompactionOptionsFIFO {
-  // once the total sum of table files reaches this, we will delete the oldest
-  // table file
-  // Default: 1GB
-  uint64_t max_table_files_size;
-
-  CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
-};
-
-// Compression options for different compression algorithms like Zlib
-struct CompressionOptions {
-  int window_bits;
-  int level;
-  int strategy;
-  // Maximum size of dictionary used to prime the compression library. Currently
-  // this dictionary will be constructed by sampling the first output file in a
-  // subcompaction when the target level is bottommost. This dictionary will be
-  // loaded into the compression library before compressing/uncompressing each
-  // data block of subsequent files in the subcompaction. Effectively, this
-  // improves compression ratios when there are repetitions across data blocks.
-  // A value of 0 indicates the feature is disabled.
-  // Default: 0.
-  uint32_t max_dict_bytes;
-
-  CompressionOptions()
-      : window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {}
-  CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes)
-      : window_bits(wbits),
-        level(_lev),
-        strategy(_strategy),
-        max_dict_bytes(_max_dict_bytes) {}
-};
-
-enum UpdateStatus {    // Return status For inplace update callback
-  UPDATE_FAILED   = 0, // Nothing to update
-  UPDATED_INPLACE = 1, // Value updated inplace
-  UPDATED         = 2, // No inplace update. Merged value set
-};
-
-struct DbPath {
-  std::string path;
-  uint64_t target_size;  // Target size of total files under the path, in byte.
-
-  DbPath() : target_size(0) {}
-  DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
-};
-
 struct Options;

-struct ColumnFamilyOptions {
+struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
  // The function recovers options to a previous version. Only 4.6 or later
  // versions are supported.
  ColumnFamilyOptions* OldDefaults(int rocksdb_major_version = 4,
@ -295,54 +188,6 @@ struct ColumnFamilyOptions {
  // Dynamically changeable through SetOptions() API
  size_t write_buffer_size = 64 << 20;

-  // The maximum number of write buffers that are built up in memory.
-  // The default and the minimum number is 2, so that when 1 write buffer
-  // is being flushed to storage, new writes can continue to the other
-  // write buffer.
-  // If max_write_buffer_number > 3, writing will be slowed down to
-  // options.delayed_write_rate if we are writing to the last write buffer
-  // allowed.
-  //
-  // Default: 2
-  //
-  // Dynamically changeable through SetOptions() API
-  int max_write_buffer_number = 2;
-
-  // The minimum number of write buffers that will be merged together
-  // before writing to storage.  If set to 1, then
-  // all write buffers are flushed to L0 as individual files and this increases
-  // read amplification because a get request has to check in all of these
-  // files. Also, an in-memory merge may result in writing lesser
-  // data to storage if there are duplicate records in each of these
-  // individual write buffers.  Default: 1
-  int min_write_buffer_number_to_merge = 1;
-
-  // The total maximum number of write buffers to maintain in memory including
-  // copies of buffers that have already been flushed.  Unlike
-  // max_write_buffer_number, this parameter does not affect flushing.
-  // This controls the minimum amount of write history that will be available
-  // in memory for conflict checking when Transactions are used.
-  //
-  // When using an OptimisticTransactionDB:
-  // If this value is too low, some transactions may fail at commit time due
-  // to not being able to determine whether there were any write conflicts.
-  //
-  // When using a TransactionDB:
-  // If Transaction::SetSnapshot is used, TransactionDB will read either
-  // in-memory write buffers or SST files to do write-conflict checking.
-  // Increasing this value can reduce the number of reads to SST files
-  // done for conflict detection.
-  //
-  // Setting this value to 0 will cause write buffers to be freed immediately
-  // after they are flushed.
-  // If this value is set to -1, 'max_write_buffer_number' will be used.
-  //
-  // Default:
-  // If using a TransactionDB/OptimisticTransactionDB, the default value will
-  // be set to the value of 'max_write_buffer_number' if it is not explicitly
-  // set by the user.  Otherwise, the default is 0.
-  int max_write_buffer_number_to_maintain = 0;
-
  // Compress blocks using the specified compression algorithm.  This
  // parameter can be changed dynamically.
  //
@ -359,29 +204,6 @@ struct ColumnFamilyOptions {
  // efficiently detect that and will switch to uncompressed mode.
  CompressionType compression;

-  // Different levels can have different compression policies. There
-  // are cases where most lower levels would like to use quick compression
-  // algorithms while the higher levels (which have more data) use
-  // compression algorithms that have better compression but could
-  // be slower. This array, if non-empty, should have an entry for
-  // each level of the database; these override the value specified in
-  // the previous field 'compression'.
-  //
-  // NOTICE if level_compaction_dynamic_level_bytes=true,
-  // compression_per_level[0] still determines L0, but other elements
-  // of the array are based on base level (the level L0 files are merged
-  // to), and may not match the level users see from info log for metadata.
-  // If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
-  // determines compaction type for level n+i-1.
-  // For example, if we have three 5 levels, and we determine to merge L0
-  // data to L4 (which means L1..L3 will be empty), then the new files go to
-  // L4 uses compression type compression_per_level[1].
-  // If now L0 is merged to L2. Data goes to L2 will be compressed
-  // according to compression_per_level[1], L3 using compression_per_level[2]
-  // and L4 using compression_per_level[3]. Compaction for each level can
-  // change when data grows.
-  std::vector<CompressionType> compression_per_level;
-
  // Compression algorithm that will be used for the bottommost level that
  // contain files. If level-compaction is used, this option will only affect
  // levels after base level.
@ -392,6 +214,14 @@ struct ColumnFamilyOptions {
  // different options for compression algorithms
  CompressionOptions compression_opts;

+  // Number of files to trigger level-0 compaction. A value <0 means that
+  // level-0 compaction will not be triggered by number of files at all.
+  //
+  // Default: 4
+  //
+  // Dynamically changeable through SetOptions() API
+  int level0_file_num_compaction_trigger = 4;
+
  // If non-nullptr, use the specified function to determine the
  // prefixes for keys.  These prefixes will be placed in the filter.
  // Depending on the workload, this can reduce the number of read-IOP
@ -408,56 +238,6 @@ struct ColumnFamilyOptions {
  // Default: nullptr
  std::shared_ptr<const SliceTransform> prefix_extractor = nullptr;

-  // Number of levels for this database
-  int num_levels = 7;
-
-  // Number of files to trigger level-0 compaction. A value <0 means that
-  // level-0 compaction will not be triggered by number of files at all.
-  //
-  // Default: 4
-  //
-  // Dynamically changeable through SetOptions() API
-  int level0_file_num_compaction_trigger = 4;
-
-  // Soft limit on number of level-0 files. We start slowing down writes at this
-  // point. A value <0 means that no writing slow down will be triggered by
-  // number of files in level-0.
-  //
-  // Default: 20
-  //
-  // Dynamically changeable through SetOptions() API
-  int level0_slowdown_writes_trigger = 20;
-
-  // Maximum number of level-0 files.  We stop writes at this point.
-  //
-  // Default: 36
-  //
-  // Dynamically changeable through SetOptions() API
-  int level0_stop_writes_trigger = 36;
-
-  // This does not do anything anymore. Deprecated.
-  int max_mem_compaction_level;
-
-  // Target file size for compaction.
-  // target_file_size_base is per-file size for level-1.
-  // Target file size for level L can be calculated by
-  // target_file_size_base * (target_file_size_multiplier ^ (L-1))
-  // For example, if target_file_size_base is 2MB and
-  // target_file_size_multiplier is 10, then each file on level-1 will
-  // be 2MB, and each file on level 2 will be 20MB,
-  // and each file on level-3 will be 200MB.
-  //
-  // Default: 64MB.
-  //
-  // Dynamically changeable through SetOptions() API
-  uint64_t target_file_size_base = 64 * 1048576;
-
-  // By default target_file_size_multiplier is 1, which means
-  // by default files in different levels will have similar size.
-  //
-  // Dynamically changeable through SetOptions() API
-  int target_file_size_multiplier = 1;
-
  // Control maximum total data size for a level.
  // max_bytes_for_level_base is the max total for level-1.
  // Maximum number of bytes for level L can be calculated as
@ -472,355 +252,18 @@ struct ColumnFamilyOptions {
  // Dynamically changeable through SetOptions() API
  uint64_t max_bytes_for_level_base = 256 * 1048576;

-  // If true, RocksDB will pick target size of each level dynamically.
-  // We will pick a base level b >= 1. L0 will be directly merged into level b,
-  // instead of always into level 1. Level 1 to b-1 need to be empty.
-  // We try to pick b and its target size so that
-  // 1. target size is in the range of
-  //   (max_bytes_for_level_base / max_bytes_for_level_multiplier,
-  //    max_bytes_for_level_base]
-  // 2. target size of the last level (level num_levels-1) equals to extra size
-  //    of the level.
-  // At the same time max_bytes_for_level_multiplier and
-  // max_bytes_for_level_multiplier_additional are still satisfied.
-  //
-  // With this option on, from an empty DB, we make last level the base level,
-  // which means merging L0 data into the last level, until it exceeds
-  // max_bytes_for_level_base. And then we make the second last level to be
-  // base level, to start to merge L0 data to second last level, with its
-  // target size to be 1/max_bytes_for_level_multiplier of the last level's
-  // extra size. After the data accumulates more so that we need to move the
-  // base level to the third last one, and so on.
-  //
-  // For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
-  // and max_bytes_for_level_base=10MB.
-  // Target sizes of level 1 to 5 starts with:
-  // [- - - - 10MB]
-  // with base level is level. Target sizes of level 1 to 4 are not applicable
-  // because they will not be used.
-  // Until the size of Level 5 grows to more than 10MB, say 11MB, we make
-  // base target to level 4 and now the targets looks like:
-  // [- - - 1.1MB 11MB]
-  // While data are accumulated, size targets are tuned based on actual data
-  // of level 5. When level 5 has 50MB of data, the target is like:
-  // [- - - 5MB 50MB]
-  // Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
-  // level 4 to be the base level, its target size needs to be 10.1MB, which
-  // doesn't satisfy the target size range. So now we make level 3 the target
-  // size and the target sizes of the levels look like:
-  // [- - 1.01MB 10.1MB 101MB]
-  // In the same way, while level 5 further grows, all levels' targets grow,
-  // like
-  // [- - 5MB 50MB 500MB]
-  // Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
-  // base level and make levels' target sizes like this:
-  // [- 1.001MB 10.01MB 100.1MB 1001MB]
-  // and go on...
-  //
-  // By doing it, we give max_bytes_for_level_multiplier a priority against
-  // max_bytes_for_level_base, for a more predictable LSM tree shape. It is
-  // useful to limit worse case space amplification.
-  //
-  // max_bytes_for_level_multiplier_additional is ignored with this flag on.
-  //
-  // Turning this feature on or off for an existing DB can cause unexpected
-  // LSM tree structure so it's not recommended.
-  //
-  // NOTE: this option is experimental
-  //
-  // Default: false
-  bool level_compaction_dynamic_level_bytes = false;
-
-  // Default: 10.
-  //
-  // Dynamically changeable through SetOptions() API
-  double max_bytes_for_level_multiplier = 10;
-
-  // Different max-size multipliers for different levels.
-  // These are multiplied by max_bytes_for_level_multiplier to arrive
-  // at the max-size of each level.
-  //
-  // Default: 1
-  //
-  // Dynamically changeable through SetOptions() API
-  std::vector<int> max_bytes_for_level_multiplier_additional =
-      std::vector<int>(num_levels, 1);
-
-  // We try to limit number of bytes in one compaction to be lower than this
-  // threshold. But it's not guaranteed.
-  // Value 0 will be sanitized.
-  //
-  // Default: result.target_file_size_base * 25
-  uint64_t max_compaction_bytes = 0;
-
-  // DEPRECATED -- this options is no longer used
-  // Puts are delayed to options.delayed_write_rate when any level has a
-  // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
-  //
-  // Default: 0 (disabled)
-  //
-  // Dynamically changeable through SetOptions() API
-  double soft_rate_limit = 0.0;
-
-  // DEPRECATED -- this options is no longer used
-  double hard_rate_limit = 0.0;
-
-  // All writes will be slowed down to at least delayed_write_rate if estimated
-  // bytes needed to be compaction exceed this threshold.
-  //
-  // Default: 64GB
-  uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
-
-  // All writes are stopped if estimated bytes needed to be compaction exceed
-  // this threshold.
-  //
-  // Default: 256GB
-  uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
-
-  // DEPRECATED -- this options is no longer used
-  unsigned int rate_limit_delay_max_milliseconds = 100;
-
-  // size of one block in arena memory allocation.
-  // If <= 0, a proper value is automatically calculated (usually 1/8 of
-  // writer_buffer_size, rounded up to a multiple of 4KB).
-  //
-  // There are two additional restriction of the The specified size:
-  // (1) size should be in the range of [4096, 2 << 30] and
-  // (2) be the multiple of the CPU word (which helps with the memory
-  // alignment).
-  //
-  // We'll automatically check and adjust the size number to make sure it
-  // conforms to the restrictions.
-  //
-  // Default: 0
-  //
-  // Dynamically changeable through SetOptions() API
-  size_t arena_block_size = 0;
-
  // Disable automatic compactions. Manual compactions can still
  // be issued on this column family
  //
  // Dynamically changeable through SetOptions() API
  bool disable_auto_compactions = false;

-  // DEPREACTED
-  // Does not have any effect.
-  bool purge_redundant_kvs_while_flush = true;
-
-  // The compaction style. Default: kCompactionStyleLevel
-  CompactionStyle compaction_style = kCompactionStyleLevel;
-
-  // If level compaction_style = kCompactionStyleLevel, for each level,
-  // which files are prioritized to be picked to compact.
-  // Default: kByCompensatedSize
-  CompactionPri compaction_pri = kByCompensatedSize;
-
-
-  // The options needed to support Universal Style compactions
-  CompactionOptionsUniversal compaction_options_universal;
-
-  // The options for FIFO compaction style
-  CompactionOptionsFIFO compaction_options_fifo;
-
-  // An iteration->Next() sequentially skips over keys with the same
-  // user-key unless this option is set. This number specifies the number
-  // of keys (with the same userkey) that will be sequentially
-  // skipped before a reseek is issued.
-  //
-  // Default: 8
-  //
-  // Dynamically changeable through SetOptions() API
-  uint64_t max_sequential_skip_in_iterations = 8;
-
-  // This is a factory that provides MemTableRep objects.
-  // Default: a factory that provides a skip-list-based implementation of
-  // MemTableRep.
-  std::shared_ptr<MemTableRepFactory> memtable_factory =
-      std::shared_ptr<SkipListFactory>(new SkipListFactory);
-
  // This is a factory that provides TableFactory objects.
  // Default: a block-based table factory that provides a default
  // implementation of TableBuilder and TableReader with default
  // BlockBasedTableOptions.
  std::shared_ptr<TableFactory> table_factory;

-  // Block-based table related options are moved to BlockBasedTableOptions.
-  // Related options that were originally here but now moved include:
-  //   no_block_cache
-  //   block_cache
-  //   block_cache_compressed
-  //   block_size
-  //   block_size_deviation
-  //   block_restart_interval
-  //   filter_policy
-  //   whole_key_filtering
-  // If you'd like to customize some of these options, you will need to
-  // use NewBlockBasedTableFactory() to construct a new table factory.
-
-  // This option allows user to collect their own interested statistics of
-  // the tables.
-  // Default: empty vector -- no user-defined statistics collection will be
-  // performed.
-  typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
-      TablePropertiesCollectorFactories;
-  TablePropertiesCollectorFactories table_properties_collector_factories;
-
-  // Allows thread-safe inplace updates. If this is true, there is no way to
-  // achieve point-in-time consistency using snapshot or iterator (assuming
-  // concurrent updates). Hence iterator and multi-get will return results
-  // which are not consistent as of any point-in-time.
-  // If inplace_callback function is not set,
-  //   Put(key, new_value) will update inplace the existing_value iff
-  //   * key exists in current memtable
-  //   * new sizeof(new_value) <= sizeof(existing_value)
-  //   * existing_value for that key is a put i.e. kTypeValue
-  // If inplace_callback function is set, check doc for inplace_callback.
-  // Default: false.
-  bool inplace_update_support = false;
-
-  // Number of locks used for inplace update
-  // Default: 10000, if inplace_update_support = true, else 0.
-  //
-  // Dynamically changeable through SetOptions() API
-  size_t inplace_update_num_locks = 10000;
-
-  // existing_value - pointer to previous value (from both memtable and sst).
-  //                  nullptr if key doesn't exist
-  // existing_value_size - pointer to size of existing_value).
-  //                       nullptr if key doesn't exist
-  // delta_value - Delta value to be merged with the existing_value.
-  //               Stored in transaction logs.
-  // merged_value - Set when delta is applied on the previous value.
-
-  // Applicable only when inplace_update_support is true,
-  // this callback function is called at the time of updating the memtable
-  // as part of a Put operation, lets say Put(key, delta_value). It allows the
-  // 'delta_value' specified as part of the Put operation to be merged with
-  // an 'existing_value' of the key in the database.
-
-  // If the merged value is smaller in size that the 'existing_value',
-  // then this function can update the 'existing_value' buffer inplace and
-  // the corresponding 'existing_value'_size pointer, if it wishes to.
-  // The callback should return UpdateStatus::UPDATED_INPLACE.
-  // In this case. (In this case, the snapshot-semantics of the rocksdb
-  // Iterator is not atomic anymore).
-
-  // If the merged value is larger in size than the 'existing_value' or the
-  // application does not wish to modify the 'existing_value' buffer inplace,
-  // then the merged value should be returned via *merge_value. It is set by
-  // merging the 'existing_value' and the Put 'delta_value'. The callback should
-  // return UpdateStatus::UPDATED in this case. This merged value will be added
-  // to the memtable.
-
-  // If merging fails or the application does not wish to take any action,
-  // then the callback should return UpdateStatus::UPDATE_FAILED.
-
-  // Please remember that the original call from the application is Put(key,
-  // delta_value). So the transaction log (if enabled) will still contain (key,
-  // delta_value). The 'merged_value' is not stored in the transaction log.
-  // Hence the inplace_callback function should be consistent across db reopens.
-
-  // Default: nullptr
-  UpdateStatus (*inplace_callback)(char* existing_value,
-                                   uint32_t* existing_value_size,
-                                   Slice delta_value,
-                                   std::string* merged_value) = nullptr;
-
-  // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0,
-  // create prefix bloom for memtable with the size of
-  // write_buffer_size * memtable_prefix_bloom_size_ratio.
-  // If it is larger than 0.25, it is santinized to 0.25.
-  //
-  // Default: 0 (disable)
-  //
-  // Dynamically changeable through SetOptions() API
-  double memtable_prefix_bloom_size_ratio = 0.0;
-
-  // Page size for huge page for the arena used by the memtable. If <=0, it
-  // won't allocate from huge page but from malloc.
-  // Users are responsible to reserve huge pages for it to be allocated. For
-  // example:
-  //      sysctl -w vm.nr_hugepages=20
-  // See linux doc Documentation/vm/hugetlbpage.txt
-  // If there isn't enough free huge page available, it will fall back to
-  // malloc.
-  //
-  // Dynamically changeable through SetOptions() API
-  size_t memtable_huge_page_size = 0;
-
-  // If non-nullptr, memtable will use the specified function to extract
-  // prefixes for keys, and for each prefix maintain a hint of insert location
-  // to reduce CPU usage for inserting keys with the prefix. Keys out of
-  // domain of the prefix extractor will be insert without using hints.
-  //
-  // Currently only the default skiplist based memtable implements the feature.
-  // All other memtable implementation will ignore the option. It incurs ~250
-  // additional bytes of memory overhead to store a hint for each prefix.
-  // Also concurrent writes (when allow_concurrent_memtable_write is true) will
-  // ignore the option.
-  //
-  // The option is best suited for workloads where keys will likely to insert
-  // to a location close the the last inserted key with the same prefix.
-  // One example could be inserting keys of the form (prefix + timestamp),
-  // and keys of the same prefix always comes in with time order. Another
-  // example would be updating the same key over and over again, in which case
-  // the prefix can be the key itself.
-  //
-  // Default: nullptr (disable)
-  std::shared_ptr<const SliceTransform>
-      memtable_insert_with_hint_prefix_extractor = nullptr;
-
-  // Control locality of bloom filter probes to improve cache miss rate.
-  // This option only applies to memtable prefix bloom and plaintable
-  // prefix bloom. It essentially limits every bloom checking to one cache line.
-  // This optimization is turned off when set to 0, and positive number to turn
-  // it on.
-  // Default: 0
-  uint32_t bloom_locality = 0;
-
-  // Maximum number of successive merge operations on a key in the memtable.
-  //
-  // When a merge operation is added to the memtable and the maximum number of
-  // successive merges is reached, the value of the key will be calculated and
-  // inserted into the memtable instead of the merge operation. This will
-  // ensure that there are never more than max_successive_merges merge
-  // operations in the memtable.
-  //
-  // Default: 0 (disabled)
-  //
-  // Dynamically changeable through SetOptions() API
-  size_t max_successive_merges = 0;
-
-  // This flag specifies that the implementation should optimize the filters
-  // mainly for cases where keys are found rather than also optimize for keys
-  // missed. This would be used in cases where the application knows that
-  // there are very few misses or the performance in the case of misses is not
-  // important.
-  //
-  // For now, this flag allows us to not store filters for the last level i.e
-  // the largest level which contains data of the LSM store. For keys which
-  // are hits, the filters in this level are not useful because we will search
-  // for the data anyway. NOTE: the filters in other levels are still useful
-  // even for key hit because they tell us whether to look in that level or go
-  // to the higher level.
-  //
-  // Default: false
-  bool optimize_filters_for_hits = false;
-
-  // After writing every SST file, reopen it and read all the keys.
-  // Default: false
-  bool paranoid_file_checks = false;
-
-  // In debug mode, RocksDB run consistency checks on the LSM everytime the LSM
-  // change (Flush, Compaction, AddFile). These checks are disabled in release
-  // mode, use this option to enable them in release mode as well.
-  // Default: false
-  bool force_consistency_checks = false;
-
-  // Measure IO stats in compactions and flushes, if true.
-  // Default: false
-  bool report_bg_io_stats = false;
-
  // Create ColumnFamilyOptions with default values for all fields
  ColumnFamilyOptions();
  // Create ColumnFamilyOptions from Options
@ -829,6 +272,38 @@ struct ColumnFamilyOptions {
  void Dump(Logger* log) const;
 };

+enum class WALRecoveryMode : char {
+  // Original levelDB recovery
+  // We tolerate incomplete record in trailing data on all logs
+  // Use case : This is legacy behavior (default)
+  kTolerateCorruptedTailRecords = 0x00,
+  // Recover from clean shutdown
+  // We don't expect to find any corruption in the WAL
+  // Use case : This is ideal for unit tests and rare applications that
+  // can require high consistency guarantee
+  kAbsoluteConsistency = 0x01,
+  // Recover to point-in-time consistency
+  // We stop the WAL playback on discovering WAL inconsistency
+  // Use case : Ideal for systems that have disk controller cache like
+  // hard disk, SSD without super capacitor that store related data
+  kPointInTimeRecovery = 0x02,
+  // Recovery after a disaster
+  // We ignore any corruption in the WAL and try to salvage as much data as
+  // possible
+  // Use case : Ideal for last ditch effort to recover data or systems that
+  // operate with low grade unrelated data
+  kSkipAnyCorruptedRecords = 0x03,
+};
+
+struct DbPath {
+  std::string path;
+  uint64_t target_size;  // Target size of total files under the path, in byte.
+
+  DbPath() : target_size(0) {}
+  DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
+};
+
+
 struct DBOptions {
  // The function recovers options to the option as in version 4.6.
  DBOptions* OldDefaults(int rocksdb_major_version = 4,
--- a/util/options.cc
+++ b/util/options.cc
@ -36,53 +36,42 @@

 namespace rocksdb {

-ColumnFamilyOptions::ColumnFamilyOptions()
-    : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
-      table_factory(
-          std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {
+AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
  assert(memtable_factory.get() != nullptr);
 }

-ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
-    : comparator(options.comparator),
-      merge_operator(options.merge_operator),
-      compaction_filter(options.compaction_filter),
-      compaction_filter_factory(options.compaction_filter_factory),
-      write_buffer_size(options.write_buffer_size),
-      max_write_buffer_number(options.max_write_buffer_number),
+AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
+    : max_write_buffer_number(options.max_write_buffer_number),
      min_write_buffer_number_to_merge(
          options.min_write_buffer_number_to_merge),
      max_write_buffer_number_to_maintain(
          options.max_write_buffer_number_to_maintain),
-      compression(options.compression),
+      inplace_update_support(options.inplace_update_support),
+      inplace_update_num_locks(options.inplace_update_num_locks),
+      inplace_callback(options.inplace_callback),
+      memtable_prefix_bloom_size_ratio(
+          options.memtable_prefix_bloom_size_ratio),
+      memtable_huge_page_size(options.memtable_huge_page_size),
+      memtable_insert_with_hint_prefix_extractor(
+          options.memtable_insert_with_hint_prefix_extractor),
+      bloom_locality(options.bloom_locality),
+      arena_block_size(options.arena_block_size),
      compression_per_level(options.compression_per_level),
-      bottommost_compression(options.bottommost_compression),
-      compression_opts(options.compression_opts),
-      prefix_extractor(options.prefix_extractor),
      num_levels(options.num_levels),
-      level0_file_num_compaction_trigger(
-          options.level0_file_num_compaction_trigger),
      level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
      level0_stop_writes_trigger(options.level0_stop_writes_trigger),
      target_file_size_base(options.target_file_size_base),
      target_file_size_multiplier(options.target_file_size_multiplier),
-      max_bytes_for_level_base(options.max_bytes_for_level_base),
      level_compaction_dynamic_level_bytes(
          options.level_compaction_dynamic_level_bytes),
      max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
      max_bytes_for_level_multiplier_additional(
          options.max_bytes_for_level_multiplier_additional),
      max_compaction_bytes(options.max_compaction_bytes),
-      soft_rate_limit(options.soft_rate_limit),
      soft_pending_compaction_bytes_limit(
          options.soft_pending_compaction_bytes_limit),
      hard_pending_compaction_bytes_limit(
          options.hard_pending_compaction_bytes_limit),
-      rate_limit_delay_max_milliseconds(
-          options.rate_limit_delay_max_milliseconds),
-      arena_block_size(options.arena_block_size),
-      disable_auto_compactions(options.disable_auto_compactions),
-      purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush),
      compaction_style(options.compaction_style),
      compaction_pri(options.compaction_pri),
      compaction_options_universal(options.compaction_options_universal),
@ -90,18 +79,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
      max_sequential_skip_in_iterations(
          options.max_sequential_skip_in_iterations),
      memtable_factory(options.memtable_factory),
-      table_factory(options.table_factory),
      table_properties_collector_factories(
          options.table_properties_collector_factories),
-      inplace_update_support(options.inplace_update_support),
-      inplace_update_num_locks(options.inplace_update_num_locks),
-      inplace_callback(options.inplace_callback),
-      memtable_prefix_bloom_size_ratio(
-          options.memtable_prefix_bloom_size_ratio),
-      memtable_huge_page_size(options.memtable_huge_page_size),
-      memtable_insert_with_hint_prefix_extractor(
-          options.memtable_insert_with_hint_prefix_extractor),
-      bloom_locality(options.bloom_locality),
      max_successive_merges(options.max_successive_merges),
      optimize_filters_for_hits(options.optimize_filters_for_hits),
      paranoid_file_checks(options.paranoid_file_checks),
@ -114,6 +93,28 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
  }
 }

+ColumnFamilyOptions::ColumnFamilyOptions()
+    : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
+      table_factory(
+          std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
+
+ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
+    : AdvancedColumnFamilyOptions(options),
+      comparator(options.comparator),
+      merge_operator(options.merge_operator),
+      compaction_filter(options.compaction_filter),
+      compaction_filter_factory(options.compaction_filter_factory),
+      write_buffer_size(options.write_buffer_size),
+      compression(options.compression),
+      bottommost_compression(options.bottommost_compression),
+      compression_opts(options.compression_opts),
+      level0_file_num_compaction_trigger(
+          options.level0_file_num_compaction_trigger),
+      prefix_extractor(options.prefix_extractor),
+      max_bytes_for_level_base(options.max_bytes_for_level_base),
+      disable_auto_compactions(options.disable_auto_compactions),
+      table_factory(options.table_factory) {}
+
 DBOptions::DBOptions() {}

 DBOptions::DBOptions(const Options& options)
--- a/util/options_helper.h
+++ b/util/options_helper.h
@ -365,6 +365,21 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
      OptionType::kBoolean, OptionVerificationType::kNormal, true,
      offsetof(struct MutableDBOptions, avoid_flush_during_shutdown)}}};

+// offset_of is used to get the offset of a class data member
+// ex: offset_of(&ColumnFamilyOptions::num_levels)
+// This call will return the offset of num_levels in ColumnFamilyOptions class
+//
+// This is the same as offsetof() but allow us to work with non standard-layout
+// classes and structures
+// refs:
+// http://en.cppreference.com/w/cpp/concept/StandardLayoutType
+// https://gist.github.com/graphitemaster/494f21190bb2c63c5516
+template <typename T1, typename T2>
+inline int offset_of(T1 T2::*member) {
+  static T2 obj;
+  return int(size_t(&(obj.*member)) - size_t(&obj));
+}
+
 static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
    /* not yet supported
    CompactionOptionsFIFO compaction_options_fifo;
@ -379,45 +394,44 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
                                     std::string* merged_value);
     */
    {"report_bg_io_stats",
-     {offsetof(struct ColumnFamilyOptions, report_bg_io_stats),
-      OptionType::kBoolean, OptionVerificationType::kNormal, true,
+     {offset_of(&ColumnFamilyOptions::report_bg_io_stats), OptionType::kBoolean,
+      OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, report_bg_io_stats)}},
    {"compaction_measure_io_stats",
     {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, 0}},
    {"disable_auto_compactions",
-     {offsetof(struct ColumnFamilyOptions, disable_auto_compactions),
+     {offset_of(&ColumnFamilyOptions::disable_auto_compactions),
      OptionType::kBoolean, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, disable_auto_compactions)}},
    {"filter_deletes",
     {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, true, 0}},
    {"inplace_update_support",
-     {offsetof(struct ColumnFamilyOptions, inplace_update_support),
+     {offset_of(&ColumnFamilyOptions::inplace_update_support),
      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
    {"level_compaction_dynamic_level_bytes",
-     {offsetof(struct ColumnFamilyOptions,
-               level_compaction_dynamic_level_bytes),
+     {offset_of(&ColumnFamilyOptions::level_compaction_dynamic_level_bytes),
      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
    {"optimize_filters_for_hits",
-     {offsetof(struct ColumnFamilyOptions, optimize_filters_for_hits),
+     {offset_of(&ColumnFamilyOptions::optimize_filters_for_hits),
      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
    {"paranoid_file_checks",
-     {offsetof(struct ColumnFamilyOptions, paranoid_file_checks),
+     {offset_of(&ColumnFamilyOptions::paranoid_file_checks),
      OptionType::kBoolean, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, paranoid_file_checks)}},
    {"force_consistency_checks",
-     {offsetof(struct ColumnFamilyOptions, force_consistency_checks),
+     {offset_of(&ColumnFamilyOptions::force_consistency_checks),
      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
    {"purge_redundant_kvs_while_flush",
-     {offsetof(struct ColumnFamilyOptions, purge_redundant_kvs_while_flush),
+     {offset_of(&ColumnFamilyOptions::purge_redundant_kvs_while_flush),
      OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
    {"verify_checksums_in_compaction",
     {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, true, 0}},
    {"soft_pending_compaction_bytes_limit",
-     {offsetof(struct ColumnFamilyOptions, soft_pending_compaction_bytes_limit),
+     {offset_of(&ColumnFamilyOptions::soft_pending_compaction_bytes_limit),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, soft_pending_compaction_bytes_limit)}},
    {"hard_pending_compaction_bytes_limit",
-     {offsetof(struct ColumnFamilyOptions, hard_pending_compaction_bytes_limit),
+     {offset_of(&ColumnFamilyOptions::hard_pending_compaction_bytes_limit),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, hard_pending_compaction_bytes_limit)}},
    {"hard_rate_limit",
@ -425,21 +439,21 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
    {"soft_rate_limit",
     {0, OptionType::kDouble, OptionVerificationType::kDeprecated, true, 0}},
    {"max_compaction_bytes",
-     {offsetof(struct ColumnFamilyOptions, max_compaction_bytes),
+     {offset_of(&ColumnFamilyOptions::max_compaction_bytes),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_compaction_bytes)}},
    {"expanded_compaction_factor",
     {0, OptionType::kInt, OptionVerificationType::kDeprecated, true, 0}},
    {"level0_file_num_compaction_trigger",
-     {offsetof(struct ColumnFamilyOptions, level0_file_num_compaction_trigger),
+     {offset_of(&ColumnFamilyOptions::level0_file_num_compaction_trigger),
      OptionType::kInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, level0_file_num_compaction_trigger)}},
    {"level0_slowdown_writes_trigger",
-     {offsetof(struct ColumnFamilyOptions, level0_slowdown_writes_trigger),
+     {offset_of(&ColumnFamilyOptions::level0_slowdown_writes_trigger),
      OptionType::kInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, level0_slowdown_writes_trigger)}},
    {"level0_stop_writes_trigger",
-     {offsetof(struct ColumnFamilyOptions, level0_stop_writes_trigger),
+     {offset_of(&ColumnFamilyOptions::level0_stop_writes_trigger),
      OptionType::kInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, level0_stop_writes_trigger)}},
    {"max_grandparent_overlap_factor",
@ -447,53 +461,53 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
    {"max_mem_compaction_level",
     {0, OptionType::kInt, OptionVerificationType::kDeprecated, false, 0}},
    {"max_write_buffer_number",
-     {offsetof(struct ColumnFamilyOptions, max_write_buffer_number),
+     {offset_of(&ColumnFamilyOptions::max_write_buffer_number),
      OptionType::kInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_write_buffer_number)}},
    {"max_write_buffer_number_to_maintain",
-     {offsetof(struct ColumnFamilyOptions, max_write_buffer_number_to_maintain),
+     {offset_of(&ColumnFamilyOptions::max_write_buffer_number_to_maintain),
      OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
    {"min_write_buffer_number_to_merge",
-     {offsetof(struct ColumnFamilyOptions, min_write_buffer_number_to_merge),
+     {offset_of(&ColumnFamilyOptions::min_write_buffer_number_to_merge),
      OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
    {"num_levels",
-     {offsetof(struct ColumnFamilyOptions, num_levels), OptionType::kInt,
+     {offset_of(&ColumnFamilyOptions::num_levels), OptionType::kInt,
      OptionVerificationType::kNormal, false, 0}},
    {"source_compaction_factor",
     {0, OptionType::kInt, OptionVerificationType::kDeprecated, true, 0}},
    {"target_file_size_multiplier",
-     {offsetof(struct ColumnFamilyOptions, target_file_size_multiplier),
+     {offset_of(&ColumnFamilyOptions::target_file_size_multiplier),
      OptionType::kInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, target_file_size_multiplier)}},
    {"arena_block_size",
-     {offsetof(struct ColumnFamilyOptions, arena_block_size),
-      OptionType::kSizeT, OptionVerificationType::kNormal, true,
+     {offset_of(&ColumnFamilyOptions::arena_block_size), OptionType::kSizeT,
+      OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, arena_block_size)}},
    {"inplace_update_num_locks",
-     {offsetof(struct ColumnFamilyOptions, inplace_update_num_locks),
+     {offset_of(&ColumnFamilyOptions::inplace_update_num_locks),
      OptionType::kSizeT, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, inplace_update_num_locks)}},
    {"max_successive_merges",
-     {offsetof(struct ColumnFamilyOptions, max_successive_merges),
+     {offset_of(&ColumnFamilyOptions::max_successive_merges),
      OptionType::kSizeT, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_successive_merges)}},
    {"memtable_huge_page_size",
-     {offsetof(struct ColumnFamilyOptions, memtable_huge_page_size),
+     {offset_of(&ColumnFamilyOptions::memtable_huge_page_size),
      OptionType::kSizeT, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, memtable_huge_page_size)}},
    {"memtable_prefix_bloom_huge_page_tlb_size",
     {0, OptionType::kSizeT, OptionVerificationType::kDeprecated, true, 0}},
    {"write_buffer_size",
-     {offsetof(struct ColumnFamilyOptions, write_buffer_size),
-      OptionType::kSizeT, OptionVerificationType::kNormal, true,
+     {offset_of(&ColumnFamilyOptions::write_buffer_size), OptionType::kSizeT,
+      OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, write_buffer_size)}},
    {"bloom_locality",
-     {offsetof(struct ColumnFamilyOptions, bloom_locality),
-      OptionType::kUInt32T, OptionVerificationType::kNormal, false, 0}},
+     {offset_of(&ColumnFamilyOptions::bloom_locality), OptionType::kUInt32T,
+      OptionVerificationType::kNormal, false, 0}},
    {"memtable_prefix_bloom_bits",
     {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, true, 0}},
    {"memtable_prefix_bloom_size_ratio",
-     {offsetof(struct ColumnFamilyOptions, memtable_prefix_bloom_size_ratio),
+     {offset_of(&ColumnFamilyOptions::memtable_prefix_bloom_size_ratio),
      OptionType::kDouble, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, memtable_prefix_bloom_size_ratio)}},
    {"memtable_prefix_bloom_probes",
@ -501,72 +515,72 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
    {"min_partial_merge_operands",
     {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, true, 0}},
    {"max_bytes_for_level_base",
-     {offsetof(struct ColumnFamilyOptions, max_bytes_for_level_base),
+     {offset_of(&ColumnFamilyOptions::max_bytes_for_level_base),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_bytes_for_level_base)}},
    {"max_bytes_for_level_multiplier",
-     {offsetof(struct ColumnFamilyOptions, max_bytes_for_level_multiplier),
+     {offset_of(&ColumnFamilyOptions::max_bytes_for_level_multiplier),
      OptionType::kDouble, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier)}},
    {"max_bytes_for_level_multiplier_additional",
-     {offsetof(struct ColumnFamilyOptions,
-               max_bytes_for_level_multiplier_additional),
+     {offset_of(
+          &ColumnFamilyOptions::max_bytes_for_level_multiplier_additional),
      OptionType::kVectorInt, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions,
               max_bytes_for_level_multiplier_additional)}},
    {"max_sequential_skip_in_iterations",
-     {offsetof(struct ColumnFamilyOptions, max_sequential_skip_in_iterations),
+     {offset_of(&ColumnFamilyOptions::max_sequential_skip_in_iterations),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, max_sequential_skip_in_iterations)}},
    {"target_file_size_base",
-     {offsetof(struct ColumnFamilyOptions, target_file_size_base),
+     {offset_of(&ColumnFamilyOptions::target_file_size_base),
      OptionType::kUInt64T, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, target_file_size_base)}},
    {"rate_limit_delay_max_milliseconds",
     {0, OptionType::kUInt, OptionVerificationType::kDeprecated, false, 0}},
    {"compression",
-     {offsetof(struct ColumnFamilyOptions, compression),
+     {offset_of(&ColumnFamilyOptions::compression),
      OptionType::kCompressionType, OptionVerificationType::kNormal, true,
      offsetof(struct MutableCFOptions, compression)}},
    {"compression_per_level",
-     {offsetof(struct ColumnFamilyOptions, compression_per_level),
+     {offset_of(&ColumnFamilyOptions::compression_per_level),
      OptionType::kVectorCompressionType, OptionVerificationType::kNormal,
      false, 0}},
    {"bottommost_compression",
-     {offsetof(struct ColumnFamilyOptions, bottommost_compression),
+     {offset_of(&ColumnFamilyOptions::bottommost_compression),
      OptionType::kCompressionType, OptionVerificationType::kNormal, false, 0}},
    {"comparator",
-     {offsetof(struct ColumnFamilyOptions, comparator), OptionType::kComparator,
+     {offset_of(&ColumnFamilyOptions::comparator), OptionType::kComparator,
      OptionVerificationType::kByName, false, 0}},
    {"prefix_extractor",
-     {offsetof(struct ColumnFamilyOptions, prefix_extractor),
+     {offset_of(&ColumnFamilyOptions::prefix_extractor),
      OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull,
      false, 0}},
    {"memtable_insert_with_hint_prefix_extractor",
-     {offsetof(struct ColumnFamilyOptions,
-               memtable_insert_with_hint_prefix_extractor),
+     {offset_of(
+          &ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor),
      OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull,
      false, 0}},
    {"memtable_factory",
-     {offsetof(struct ColumnFamilyOptions, memtable_factory),
+     {offset_of(&ColumnFamilyOptions::memtable_factory),
      OptionType::kMemTableRepFactory, OptionVerificationType::kByName, false,
      0}},
    {"table_factory",
-     {offsetof(struct ColumnFamilyOptions, table_factory),
-      OptionType::kTableFactory, OptionVerificationType::kByName, false, 0}},
+     {offset_of(&ColumnFamilyOptions::table_factory), OptionType::kTableFactory,
+      OptionVerificationType::kByName, false, 0}},
    {"compaction_filter",
-     {offsetof(struct ColumnFamilyOptions, compaction_filter),
+     {offset_of(&ColumnFamilyOptions::compaction_filter),
      OptionType::kCompactionFilter, OptionVerificationType::kByName, false,
      0}},
    {"compaction_filter_factory",
-     {offsetof(struct ColumnFamilyOptions, compaction_filter_factory),
+     {offset_of(&ColumnFamilyOptions::compaction_filter_factory),
      OptionType::kCompactionFilterFactory, OptionVerificationType::kByName,
      false, 0}},
    {"merge_operator",
-     {offsetof(struct ColumnFamilyOptions, merge_operator),
+     {offset_of(&ColumnFamilyOptions::merge_operator),
      OptionType::kMergeOperator, OptionVerificationType::kByName, false, 0}},
    {"compaction_style",
-     {offsetof(struct ColumnFamilyOptions, compaction_style),
+     {offset_of(&ColumnFamilyOptions::compaction_style),
      OptionType::kCompactionStyle, OptionVerificationType::kNormal, false,
      0}}};

--- a/util/options_settable_test.cc
+++ b/util/options_settable_test.cc
@ -314,32 +314,31 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
  // options in the blacklist need to appear in the same order as in
  // ColumnFamilyOptions.
  const OffsetGap kColumnFamilyOptionsBlacklist = {
-      {offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
-      {offsetof(struct ColumnFamilyOptions, merge_operator),
-       sizeof(std::shared_ptr<MergeOperator>)},
-      {offsetof(struct ColumnFamilyOptions, compaction_filter),
-       sizeof(const CompactionFilter*)},
-      {offsetof(struct ColumnFamilyOptions, compaction_filter_factory),
-       sizeof(std::shared_ptr<CompactionFilterFactory>)},
-      {offsetof(struct ColumnFamilyOptions, compression_per_level),
-       sizeof(std::vector<CompressionType>)},
-      {offsetof(struct ColumnFamilyOptions, prefix_extractor),
+      {offset_of(&ColumnFamilyOptions::inplace_callback),
+       sizeof(UpdateStatus(*)(char*, uint32_t*, Slice, std::string*))},
+      {offset_of(
+           &ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor),
       sizeof(std::shared_ptr<const SliceTransform>)},
-      {offsetof(struct ColumnFamilyOptions,
-                max_bytes_for_level_multiplier_additional),
+      {offset_of(&ColumnFamilyOptions::compression_per_level),
+       sizeof(std::vector<CompressionType>)},
+      {offset_of(
+           &ColumnFamilyOptions::max_bytes_for_level_multiplier_additional),
       sizeof(std::vector<int>)},
-      {offsetof(struct ColumnFamilyOptions, memtable_factory),
+      {offset_of(&ColumnFamilyOptions::memtable_factory),
       sizeof(std::shared_ptr<MemTableRepFactory>)},
-      {offsetof(struct ColumnFamilyOptions, table_factory),
-       sizeof(std::shared_ptr<TableFactory>)},
-      {offsetof(struct ColumnFamilyOptions,
-                table_properties_collector_factories),
+      {offset_of(&ColumnFamilyOptions::table_properties_collector_factories),
       sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
-      {offsetof(struct ColumnFamilyOptions, inplace_callback),
-       sizeof(UpdateStatus(*)(char*, uint32_t*, Slice, std::string*))},
-      {offsetof(struct ColumnFamilyOptions,
-                memtable_insert_with_hint_prefix_extractor),
+      {offset_of(&ColumnFamilyOptions::comparator), sizeof(Comparator*)},
+      {offset_of(&ColumnFamilyOptions::merge_operator),
+       sizeof(std::shared_ptr<MergeOperator>)},
+      {offset_of(&ColumnFamilyOptions::compaction_filter),
+       sizeof(const CompactionFilter*)},
+      {offset_of(&ColumnFamilyOptions::compaction_filter_factory),
+       sizeof(std::shared_ptr<CompactionFilterFactory>)},
+      {offset_of(&ColumnFamilyOptions::prefix_extractor),
       sizeof(std::shared_ptr<const SliceTransform>)},
+      {offset_of(&ColumnFamilyOptions::table_factory),
+       sizeof(std::shared_ptr<TableFactory>)},
  };

  char* options_ptr = new char[sizeof(ColumnFamilyOptions)];