Optimizer's skeleton: use advisor to optimize config options (#4169)

Summary: In https://github.com/facebook/rocksdb/pull/3934 we introduced advisor scripts that make suggestions in the config options based on the log file and stats from a run of rocksdb. The optimizer runs the advisor on a benchmark application in a loop and automatically applies the suggested changes until the config options are optimized. This is a work in progress and the patch is the initial skeleton for the optimizer. The sample application that is run in the loop is currently dbbench. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4169 Reviewed By: maysamyabandeh Differential Revision: D9023671 Pulled By: poojam23 fbshipit-source-id: a6192d475c462cf6eb2b316716f97cb400fcb64d
7 years ago · 134a52e144
parent bdc6abd0b4
commit 134a52e144
20 changed files with 2463 additions and 242 deletions
--- a/tools/advisor/advisor/bench_runner.py
+++ b/tools/advisor/advisor/bench_runner.py
@ -0,0 +1,39 @@
 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 #  This source code is licensed under both the GPLv2 (found in the
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 from abc import ABC, abstractmethod
 import re
 class BenchmarkRunner(ABC):
    @staticmethod
    @abstractmethod
    def is_metric_better(new_metric, old_metric):
        pass
    @abstractmethod
    def run_experiment(self):
        # should return a list of DataSource objects
        pass
    @staticmethod
    def get_info_log_file_name(log_dir, db_path):
        # Example: DB Path = /dev/shm and OPTIONS file has option
        # db_log_dir=/tmp/rocks/, then the name of the log file will be
        # 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is
        # not specified in the OPTIONS file, then the location of the log file
        # will be /dev/shm and the name of the file will be 'LOG'
        file_name = ''
        if log_dir:
            # refer GetInfoLogPrefix() in rocksdb/util/filename.cc
            # example db_path: /dev/shm/dbbench
            file_name = db_path[1:]  # to ignore the leading '/' character
            to_be_replaced = re.compile('[^0-9a-zA-Z\-_\.]')
            for character in to_be_replaced.findall(db_path):
                file_name = file_name.replace(character, '_')
            if not file_name.endswith('_'):
                file_name += '_'
        file_name += 'LOG'
        return file_name
--- a/tools/advisor/advisor/config_optimizer_example.py
+++ b/tools/advisor/advisor/config_optimizer_example.py
@ -0,0 +1,134 @@
 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 #  This source code is licensed under both the GPLv2 (found in the
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 import argparse
 from advisor.db_config_optimizer import ConfigOptimizer
 from advisor.db_log_parser import NO_COL_FAMILY
 from advisor.db_options_parser import DatabaseOptions
 from advisor.rule_parser import RulesSpec
 CONFIG_OPT_NUM_ITER = 10
 def main(args):
    # initialise the RulesSpec parser
    rule_spec_parser = RulesSpec(args.rules_spec)
    # initialise the benchmark runner
    bench_runner_module = __import__(
        args.benchrunner_module, fromlist=[args.benchrunner_class]
    )
    bench_runner_class = getattr(bench_runner_module, args.benchrunner_class)
    ods_args = {}
    if args.ods_client and args.ods_entity:
        ods_args['client_script'] = args.ods_client
        ods_args['entity'] = args.ods_entity
        if args.ods_key_prefix:
            ods_args['key_prefix'] = args.ods_key_prefix
    db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args)
    # initialise the database configuration
    db_options = DatabaseOptions(args.rocksdb_options, args.misc_options)
    # set the frequency at which stats are dumped in the LOG file and the
    # location of the LOG file.
    db_log_dump_settings = {
        "DBOptions.stats_dump_period_sec": {
            NO_COL_FAMILY: args.stats_dump_period_sec
        }
    }
    db_options.update_options(db_log_dump_settings)
    # initialise the configuration optimizer
    config_optimizer = ConfigOptimizer(
        db_bench_runner,
        db_options,
        rule_spec_parser,
        args.base_db_path
    )
    # run the optimiser to improve the database configuration for given
    # benchmarks, with the help of expert-specified rules
    final_db_options = config_optimizer.run()
    # generate the final rocksdb options file
    print(
        'Final configuration in: ' +
        final_db_options.generate_options_config('final')
    )
    print(
        'Final miscellaneous options: ' +
        repr(final_db_options.get_misc_options())
    )
 if __name__ == '__main__':
    '''
    An example run of this tool from the command-line would look like:
    python3 -m advisor.config_optimizer_example
    --base_db_path=/tmp/rocksdbtest-155919/dbbench
    --rocksdb_options=temp/OPTIONS_boot.tmp --misc_options bloom_bits=2
    --rules_spec=advisor/rules.ini --stats_dump_period_sec=20
    --benchrunner_module=advisor.db_bench_runner
    --benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench
    readwhilewriting use_existing_db=true duration=90
    '''
    parser = argparse.ArgumentParser(description='This script is used for\
        searching for a better database configuration')
    parser.add_argument(
        '--rocksdb_options', required=True, type=str,
        help='path of the starting Rocksdb OPTIONS file'
    )
    # these are options that are column-family agnostic and are not yet
    # supported by the Rocksdb Options file: eg. bloom_bits=2
    parser.add_argument(
        '--base_db_path', required=True, type=str,
        help='path for the Rocksdb database'
    )
    parser.add_argument(
        '--misc_options', nargs='*',
        help='whitespace-separated list of options that are not supported ' +
        'by the Rocksdb OPTIONS file, given in the ' +
        '<option_name>=<option_value> format eg. "bloom_bits=2 ' +
        'rate_limiter_bytes_per_sec=128000000"')
    parser.add_argument(
        '--rules_spec', required=True, type=str,
        help='path of the file containing the expert-specified Rules'
    )
    parser.add_argument(
        '--stats_dump_period_sec', required=True, type=int,
        help='the frequency (in seconds) at which STATISTICS are printed to ' +
        'the Rocksdb LOG file'
    )
    # ODS arguments
    parser.add_argument(
        '--ods_client', type=str, help='the ODS client binary'
    )
    parser.add_argument(
        '--ods_entity', type=str,
        help='the servers for which the ODS stats need to be fetched'
    )
    parser.add_argument(
        '--ods_key_prefix', type=str,
        help='the prefix that needs to be attached to the keys of time ' +
        'series to be fetched from ODS'
    )
    # benchrunner_module example: advisor.db_benchmark_client
    parser.add_argument(
        '--benchrunner_module', required=True, type=str,
        help='the module containing the BenchmarkRunner class to be used by ' +
        'the Optimizer, example: advisor.db_bench_runner'
    )
    # benchrunner_class example: DBBenchRunner
    parser.add_argument(
        '--benchrunner_class', required=True, type=str,
        help='the name of the BenchmarkRunner class to be used by the ' +
        'Optimizer, should be present in the module provided in the ' +
        'benchrunner_module argument, example: DBBenchRunner'
    )
    parser.add_argument(
        '--benchrunner_pos_args', nargs='*',
        help='whitespace-separated positional arguments that are passed on ' +
        'to the constructor of the BenchmarkRunner class provided in the ' +
        'benchrunner_class argument, example: "use_existing_db=true ' +
        'duration=900"'
    )
    args = parser.parse_args()
    main(args)
--- a/tools/advisor/advisor/db_bench_runner.py
+++ b/tools/advisor/advisor/db_bench_runner.py
@ -0,0 +1,312 @@
 from advisor.bench_runner import BenchmarkRunner
 from advisor.db_log_parser import DataSource, DatabaseLogs, NO_COL_FAMILY
 from advisor.db_options_parser import DatabaseOptions
 from advisor.db_stats_fetcher import (
    LogStatsParser, OdsStatsFetcher, DatabasePerfContext
 )
 import os
 import re
 import shutil
 import subprocess
 import time
 '''
 NOTE: This is not thread-safe, because the output file is simply overwritten.
 '''
 class DBBenchRunner(BenchmarkRunner):
    OUTPUT_FILE = "temp/dbbench_out.tmp"
    ERROR_FILE = "temp/dbbench_err.tmp"
    DB_PATH = "DB path"
    THROUGHPUT = "ops/sec"
    PERF_CON = " PERF_CONTEXT:"
    @staticmethod
    def is_metric_better(new_metric, old_metric):
        # for db_bench 'throughput' is the metric returned by run_experiment
        return new_metric >= old_metric
    @staticmethod
    def get_opt_args_str(misc_options_dict):
        optional_args_str = ""
        for option_name, option_value in misc_options_dict.items():
            if option_value:
                optional_args_str += (
                    " --" + option_name + "=" + str(option_value)
                )
        return optional_args_str
    def __init__(self, positional_args, ods_args=None):
        # parse positional_args list appropriately
        self.db_bench_binary = positional_args[0]
        self.benchmark = positional_args[1]
        self.db_bench_args = None
        # TODO(poojam23): move to unittest with method get_available_workloads
        self.supported_benchmarks = None
        if len(positional_args) > 2:
            # options list with each option given as "<option>=<value>"
            self.db_bench_args = positional_args[2:]
        # save ods_args if provided
        self.ods_args = ods_args
    def _parse_output(self, get_perf_context=False):
        '''
        Sample db_bench output after running 'readwhilewriting' benchmark:
        DB path: [/tmp/rocksdbtest-155919/dbbench]\n
        readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
        of 5427999 found)\n
        PERF_CONTEXT:\n
        user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
        '''
        output = {
            self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None
        }
        perf_context_begins = False
        with open(self.OUTPUT_FILE, 'r') as fp:
            for line in fp:
                if line.startswith(self.benchmark):
                    print(line)  # print output of db_bench run
                    token_list = line.strip().split()
                    for ix, token in enumerate(token_list):
                        if token.startswith(self.THROUGHPUT):
                            output[self.THROUGHPUT] = (
                                float(token_list[ix - 1])
                            )
                            break
                elif line.startswith(self.PERF_CON):
                    perf_context_begins = True
                elif get_perf_context and perf_context_begins:
                    # Sample perf_context output:
                    # user_key_comparison_count = 500, block_cache_hit_count =\
                    # 468, block_read_count = 580, block_read_byte = 445, ...
                    token_list = line.strip().split(',')
                    perf_context = {
                        tk.split('=')[0].strip(): tk.split('=')[1].strip()
                        for tk in token_list
                        if tk
                    }
                    # TODO(poojam23): this is a hack and should be replaced
                    # with the timestamp that db_bench will provide per printed
                    # perf_context
                    timestamp = int(time.time())
                    perf_context_ts = {}
                    for stat in perf_context.keys():
                        perf_context_ts[stat] = {
                            timestamp: int(perf_context[stat])
                        }
                    output[self.PERF_CON] = perf_context_ts
                    perf_context_begins = False
                elif line.startswith(self.DB_PATH):
                    output[self.DB_PATH] = (
                        line.split('[')[1].split(']')[0]
                    )
        return output
    def get_log_options(self, db_options, db_path):
        # get the location of the LOG file and the frequency at which stats are
        # dumped in the LOG file
        log_dir_path = None
        stats_freq_sec = None
        logs_file_prefix = None
        # fetch the options
        dump_period = 'DBOptions.stats_dump_period_sec'
        log_dir = 'DBOptions.db_log_dir'
        log_options = db_options.get_options([dump_period, log_dir])
        if dump_period in log_options:
            stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
        if log_dir in log_options:
            log_dir_path = log_options[log_dir][NO_COL_FAMILY]
        log_file_name = DBBenchRunner.get_info_log_file_name(
            log_dir_path, db_path
        )
        if not log_dir_path:
            log_dir_path = db_path
        if not log_dir_path.endswith('/'):
            log_dir_path += '/'
        logs_file_prefix = log_dir_path + log_file_name
        return (logs_file_prefix, stats_freq_sec)
    def _get_options_command_line_args_str(self, curr_options):
        '''
        This method uses the provided Rocksdb OPTIONS to create a string of
        command-line arguments for db_bench.
        The --options_file argument is always given and the options that are
        not supported by the OPTIONS file are given as separate arguments.
        '''
        optional_args_str = DBBenchRunner.get_opt_args_str(
            curr_options.get_misc_options()
        )
        # generate an options configuration file
        options_file = curr_options.generate_options_config(nonce='12345')
        optional_args_str += " --options_file=" + options_file
        return optional_args_str
    def _setup_db_before_experiment(self, curr_options, db_path):
        # remove destination directory if it already exists
        try:
            shutil.rmtree(db_path, ignore_errors=True)
        except OSError as e:
            print('Error: rmdir ' + e.filename + ' ' + e.strerror)
        command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
            self.db_bench_binary, db_path
        )
        args_str = self._get_options_command_line_args_str(curr_options)
        command += args_str
        self._run_command(command)
    def _build_experiment_command(self, curr_options, db_path):
        command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
            self.db_bench_binary, self.benchmark, db_path
        )
        args_str = self._get_options_command_line_args_str(curr_options)
        # handle the command-line args passed in the constructor
        for cmd_line_arg in self.db_bench_args:
            args_str += (" --" + cmd_line_arg)
        command += args_str
        return command
    def _run_command(self, command):
        # run db_bench and return the
        out_file = open(self.OUTPUT_FILE, "w+")
        err_file = open(self.ERROR_FILE, "w+")
        print('executing... - ' + command)
        subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
        out_file.close()
        err_file.close()
    def run_experiment(self, db_options, db_path):
        # type: (List[str], str) -> str
        self._setup_db_before_experiment(db_options, db_path)
        command = self._build_experiment_command(db_options, db_path)
        self._run_command(command)
        parsed_output = self._parse_output(get_perf_context=True)
        # Create the LOGS object
        # get the log options from the OPTIONS file
        logs_file_prefix, stats_freq_sec = self.get_log_options(
            db_options, parsed_output[self.DB_PATH]
        )
        db_logs = DatabaseLogs(
            logs_file_prefix, db_options.get_column_families()
        )
        # Create the Log STATS object
        db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
        # Create the PerfContext STATS object
        db_perf_context = DatabasePerfContext(
            parsed_output[self.PERF_CON], 0, False
        )
        data_sources = {
            DataSource.Type.DB_OPTIONS: [db_options],
            DataSource.Type.LOG: [db_logs],
            DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context]
        }
        # Create the ODS STATS object
        if self.ods_args:
            data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher(
                self.ods_args['client_script'],
                self.ods_args['entity'],
                self.ods_args['key_prefix']
            ))
        return data_sources, parsed_output[self.THROUGHPUT]
    # TODO: this method is for testing, shift it out to unit-tests when ready
    def get_available_workloads(self):
        if not self.supported_benchmarks:
            self.supported_benchmarks = []
            command = '%s --help' % self.db_bench_binary
            self._run_command(command)
            with open(self.OUTPUT_FILE, 'r') as fp:
                start = False
                for line in fp:
                    if re.search('available benchmarks', line, re.IGNORECASE):
                        start = True
                        continue
                    elif start:
                        if re.search('meta operations', line, re.IGNORECASE):
                            break
                        benchmark_info = line.strip()
                        if benchmark_info:
                            token_list = benchmark_info.split()
                            if len(token_list) > 2 and token_list[1] == '--':
                                self.supported_benchmarks.append(token_list[0])
                    else:
                        continue
            self.supported_benchmarks = sorted(self.supported_benchmarks)
        return self.supported_benchmarks
 # TODO: remove this method, used only for testing
 def main():
    pos_args = [
        '/home/poojamalik/workspace/rocksdb/db_bench',
        'readwhilewriting',
        'use_existing_db=true',
        'duration=10'
    ]
    db_bench_helper = DBBenchRunner(pos_args)
    # populate benchmarks with the available ones in the db_bench tool
    benchmarks = db_bench_helper.get_available_workloads()
    print(benchmarks)
    print()
    options_file = (
        '/home/poojamalik/workspace/rocksdb/tools/advisor/temp/' +
        'OPTIONS_temp.tmp'
    )
    misc_options = ["rate_limiter_bytes_per_sec=1024000", "bloom_bits=2"]
    db_options = DatabaseOptions(options_file, misc_options)
    data_sources, _ = db_bench_helper.run_experiment(db_options)
    print(data_sources[DataSource.Type.DB_OPTIONS][0].options_dict)
    print()
    print(data_sources[DataSource.Type.LOG][0].logs_path_prefix)
    if os.path.isfile(data_sources[DataSource.Type.LOG][0].logs_path_prefix):
        print('log file exists!')
    else:
        print('error: log file does not exist!')
    print(data_sources[DataSource.Type.LOG][0].column_families)
    print()
    print(data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix)
    if (
        os.path.isfile(
            data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix
        )
    ):
        print('log file exists!')
    else:
        print('error: log file does not exist!')
    print(data_sources[DataSource.Type.TIME_SERIES][0].stats_freq_sec)
    print(data_sources[DataSource.Type.TIME_SERIES][1].keys_ts)
    db_options = DatabaseOptions(options_file, None)
    data_sources, _ = db_bench_helper.run_experiment(db_options)
    print(data_sources[DataSource.Type.DB_OPTIONS][0].options_dict)
    print()
    print(data_sources[DataSource.Type.LOG][0].logs_path_prefix)
    if os.path.isfile(data_sources[DataSource.Type.LOG][0].logs_path_prefix):
        print('log file exists!')
    else:
        print('error: log file does not exist!')
    print(data_sources[DataSource.Type.LOG][0].column_families)
    print()
    print(data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix)
    if (
        os.path.isfile(
            data_sources[DataSource.Type.TIME_SERIES][0].logs_file_prefix
        )
    ):
        print('log file exists!')
    else:
        print('error: log file does not exist!')
    print(data_sources[DataSource.Type.TIME_SERIES][0].stats_freq_sec)
    print(data_sources[DataSource.Type.TIME_SERIES][1].keys_ts)
    print(data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec)
 if __name__ == "__main__":
    main()
--- a/tools/advisor/advisor/db_config_optimizer.py
+++ b/tools/advisor/advisor/db_config_optimizer.py
@ -0,0 +1,282 @@
 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 #  This source code is licensed under both the GPLv2 (found in the
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 from advisor.db_log_parser import NO_COL_FAMILY
 from advisor.db_options_parser import DatabaseOptions
 from advisor.rule_parser import Suggestion
 import copy
 import random
 class ConfigOptimizer:
    SCOPE = 'scope'
    SUGG_VAL = 'suggested values'
    @staticmethod
    def apply_action_on_value(old_value, action, suggested_values):
        chosen_sugg_val = None
        if suggested_values:
            chosen_sugg_val = random.choice(list(suggested_values))
        new_value = None
        if action is Suggestion.Action.set or not old_value:
            assert(chosen_sugg_val)
            new_value = chosen_sugg_val
        else:
            # For increase/decrease actions, currently the code tries to make
            # a 30% change in the option's value per iteration. An addend is
            # also present (+1 or -1) to handle the cases when the option's
            # old value was 0 or the final int() conversion suppressed the 30%
            # change made to the option
            old_value = float(old_value)
            mul = 0
            add = 0
            if action is Suggestion.Action.increase:
                if old_value < 0:
                    mul = 0.7
                    add = 2
                else:
                    mul = 1.3
                    add = 2
            elif action is Suggestion.Action.decrease:
                if old_value < 0:
                    mul = 1.3
                    add = -2
                else:
                    mul = 0.7
                    add = -2
            new_value = int(old_value * mul + add)
        return new_value
    @staticmethod
    def improve_db_config(options, rule, suggestions_dict):
        # this method takes ONE 'rule' and applies all its suggestions on the
        # appropriate options
        required_options = []
        rule_suggestions = []
        for sugg_name in rule.get_suggestions():
            option = suggestions_dict[sugg_name].option
            action = suggestions_dict[sugg_name].action
            # A Suggestion in the rules spec must have the 'option' and
            # 'action' fields defined, always call perform_checks() method
            # after parsing the rules file using RulesSpec
            assert(option)
            assert(action)
            required_options.append(option)
            rule_suggestions.append(suggestions_dict[sugg_name])
        current_config = options.get_options(required_options)
        # Create the updated configuration from the rule's suggestions
        updated_config = {}
        for sugg in rule_suggestions:
            # case: when the option is not present in the current configuration
            if sugg.option not in current_config:
                try:
                    new_value = ConfigOptimizer.apply_action_on_value(
                        None, sugg.action, sugg.suggested_values
                    )
                    if sugg.option not in updated_config:
                        updated_config[sugg.option] = {}
                    if DatabaseOptions.is_misc_option(sugg.option):
                        # this suggestion is on an option that is not yet
                        # supported by the Rocksdb OPTIONS file and so it is
                        # not prefixed by a section type.
                        updated_config[sugg.option][NO_COL_FAMILY] = new_value
                    else:
                        for col_fam in rule.get_trigger_column_families():
                            updated_config[sugg.option][col_fam] = new_value
                except AssertionError:
                    print(
                        'WARNING(ConfigOptimizer): provide suggested_values ' +
                        'for ' + sugg.option
                    )
                continue
            # case: when the option is present in the current configuration
            if NO_COL_FAMILY in current_config[sugg.option]:
                old_value = current_config[sugg.option][NO_COL_FAMILY]
                try:
                    new_value = ConfigOptimizer.apply_action_on_value(
                        old_value, sugg.action, sugg.suggested_values
                    )
                    if sugg.option not in updated_config:
                        updated_config[sugg.option] = {}
                    updated_config[sugg.option][NO_COL_FAMILY] = new_value
                except AssertionError:
                    print(
                        'WARNING(ConfigOptimizer): provide suggested_values ' +
                        'for ' + sugg.option
                    )
            else:
                for col_fam in rule.get_trigger_column_families():
                    old_value = None
                    if col_fam in current_config[sugg.option]:
                        old_value = current_config[sugg.option][col_fam]
                    try:
                        new_value = ConfigOptimizer.apply_action_on_value(
                            old_value, sugg.action, sugg.suggested_values
                        )
                        if sugg.option not in updated_config:
                            updated_config[sugg.option] = {}
                        updated_config[sugg.option][col_fam] = new_value
                    except AssertionError:
                        print(
                            'WARNING(ConfigOptimizer): provide ' +
                            'suggested_values for ' + sugg.option
                        )
        return current_config, updated_config
    @staticmethod
    def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack):
        if not rules:
            print('\nNo more rules triggered!')
            return None
        # if the last rule provided an improvement in the database performance,
        # and it was triggered again (i.e. it is present in 'rules'), then pick
        # the same rule for this iteration too.
        if last_rule_name and not backtrack:
            for rule in rules:
                if rule.name == last_rule_name:
                    return rule
        # there was no previous rule OR the previous rule did not improve db
        # performance OR it was not triggered for this iteration,
        # then pick another rule that has not been tried yet
        for rule in rules:
            if rule.name not in rules_tried:
                return rule
        print('\nAll rules have been exhausted')
        return None
    @staticmethod
    def apply_suggestions(
        triggered_rules,
        current_rule_name,
        rules_tried,
        backtrack,
        curr_options,
        suggestions_dict
    ):
        curr_rule = ConfigOptimizer.pick_rule_to_apply(
            triggered_rules, current_rule_name, rules_tried, backtrack
        )
        if not curr_rule:
            return tuple([None]*4)
        # if a rule has been picked for improving db_config, update rules_tried
        rules_tried.add(curr_rule.name)
        # get updated config based on the picked rule
        curr_conf, updated_conf = ConfigOptimizer.improve_db_config(
            curr_options, curr_rule, suggestions_dict
        )
        conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf)
        if not conf_diff:  # the current and updated configs are the same
            curr_rule, rules_tried, curr_conf, updated_conf = (
                ConfigOptimizer.apply_suggestions(
                    triggered_rules,
                    None,
                    rules_tried,
                    backtrack,
                    curr_options,
                    suggestions_dict
                )
            )
        print('returning from apply_suggestions')
        return (curr_rule, rules_tried, curr_conf, updated_conf)
    # TODO(poojam23): check if this method is required or can we directly set
    # the config equal to the curr_config
    @staticmethod
    def get_backtrack_config(curr_config, updated_config):
        diff = DatabaseOptions.get_options_diff(curr_config, updated_config)
        bt_config = {}
        for option in diff:
            bt_config[option] = {}
            for col_fam in diff[option]:
                bt_config[option][col_fam] = diff[option][col_fam][0]
        print(bt_config)
        return bt_config
    def __init__(self, bench_runner, db_options, rule_parser, base_db):
        self.bench_runner = bench_runner
        self.db_options = db_options
        self.rule_parser = rule_parser
        self.base_db_path = base_db
    def run(self):
        # In every iteration of this method's optimization loop we pick ONE
        # RULE from all the triggered rules and apply all its suggestions to
        # the appropriate options.
        # bootstrapping the optimizer
        print('Bootstrapping optimizer:')
        options = copy.deepcopy(self.db_options)
        old_data_sources, old_metric = (
            self.bench_runner.run_experiment(options, self.base_db_path)
        )
        print('Initial metric: ' + str(old_metric))
        self.rule_parser.load_rules_from_spec()
        self.rule_parser.perform_section_checks()
        triggered_rules = self.rule_parser.get_triggered_rules(
            old_data_sources, options.get_column_families()
        )
        print('\nTriggered:')
        self.rule_parser.print_rules(triggered_rules)
        backtrack = False
        rules_tried = set()
        curr_rule, rules_tried, curr_conf, updated_conf = (
            ConfigOptimizer.apply_suggestions(
                triggered_rules,
                None,
                rules_tried,
                backtrack,
                options,
                self.rule_parser.get_suggestions_dict()
            )
        )
        # the optimizer loop
        while curr_rule:
            print('\nRule picked for next iteration:')
            print(curr_rule.name)
            print('\ncurrent config:')
            print(curr_conf)
            print('updated config:')
            print(updated_conf)
            options.update_options(updated_conf)
            # run bench_runner with updated config
            new_data_sources, new_metric = (
                self.bench_runner.run_experiment(options, self.base_db_path)
            )
            print('\nnew metric: ' + str(new_metric))
            backtrack = not self.bench_runner.is_metric_better(
                new_metric, old_metric
            )
            # update triggered_rules, metric, data_sources, if required
            if backtrack:
                # revert changes to options config
                print('\nBacktracking to previous configuration')
                backtrack_conf = ConfigOptimizer.get_backtrack_config(
                    curr_conf, updated_conf
                )
                options.update_options(backtrack_conf)
            else:
                # run advisor on new data sources
                self.rule_parser.load_rules_from_spec()  # reboot the advisor
                self.rule_parser.perform_section_checks()
                triggered_rules = self.rule_parser.get_triggered_rules(
                    new_data_sources, options.get_column_families()
                )
                print('\nTriggered:')
                self.rule_parser.print_rules(triggered_rules)
                old_metric = new_metric
                old_data_sources = new_data_sources
                rules_tried = set()
            # pick rule to work on and set curr_rule to that
            curr_rule, rules_tried, curr_conf, updated_conf = (
                ConfigOptimizer.apply_suggestions(
                    triggered_rules,
                    curr_rule.name,
                    rules_tried,
                    backtrack,
                    options,
                    self.rule_parser.get_suggestions_dict()
                )
            )
        # return the final database options configuration
        return options
--- a/tools/advisor/advisor/db_log_parser.py
+++ b/tools/advisor/advisor/db_log_parser.py
@ -4,18 +4,21 @@
 #  (found in the LICENSE.Apache file in the root directory).
 from abc import ABC, abstractmethod
 from calendar import timegm
 from enum import Enum
 import glob
 import re
-from enum import Enum
+import time
 NO_COL_FAMILY = 'DB_WIDE'
 class DataSource(ABC):
    class Type(Enum):
        LOG = 1
        DB_OPTIONS = 2
-        STATS = 3
+        TIME_SERIES = 3
        PERF_CONTEXT = 4
        ODS = 5
    def __init__(self, type):
        self.type = type
@ -33,15 +36,30 @@ class Log:
        date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}'
        return re.match(date_regex, log_line)
-    def __init__(self, log_line):
+    def __init__(self, log_line, column_families):
        token_list = log_line.strip().split()
        self.time = token_list[0]
        self.context = token_list[1]
        self.message = " ".join(token_list[2:])
        self.column_family = None
        # example log for 'default' column family:
        # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634]
        # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n"
        for col_fam in column_families:
            search_for_str = '\[' + col_fam + '\]'
            if re.search(search_for_str, self.message):
                self.column_family = col_fam
                break
        if not self.column_family:
            self.column_family = NO_COL_FAMILY
-    def get_time(self):
+    def get_human_readable_time(self):
        # example from a log line: '2018/07/25-11:25:45.782710'
        return self.time
    def get_column_family(self):
        return self.column_family
    def get_context(self):
        return self.context
@ -49,48 +67,65 @@ class Log:
        return self.message
    def append_message(self, remaining_log):
-        self.message = self.message + remaining_log
+        self.message = self.message + '\n' + remaining_log.strip()
    def get_timestamp(self):
        # example: '2018/07/25-11:25:45.782710' will be converted to the GMT
        # Unix timestamp 1532517945 (note: this method assumes that self.time
        # is in GMT)
        hr_time = self.time + 'GMT'
        timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z"))
        return timestamp
    def __repr__(self):
-        return 'time: ' + self.time + ', context: ' + self.context +\
+        return (
-             ', message: ' + self.message
+            'time: ' + self.time + '; context: ' + self.context +
            '; col_fam: ' + self.column_family +
            '; message: ' + self.message
        )
 class DatabaseLogs(DataSource):
-    def __init__(self, logs_path_prefix):
+    def __init__(self, logs_path_prefix, column_families):
        super().__init__(DataSource.Type.LOG)
        self.logs_path_prefix = logs_path_prefix
        self.column_families = column_families
-    def trigger_appropriate_conditions(self, conditions, log):
+    def trigger_conditions_for_log(self, conditions, log):
-        conditions_to_be_removed = []
+        # For a LogCondition object, trigger is:
        # Dict[column_family_name, List[Log]]. This explains why the condition
        # was triggered and for which column families.
        for cond in conditions:
            if re.search(cond.regex, log.get_message(), re.IGNORECASE):
-                cond.set_trigger(log)
+                trigger = cond.get_trigger()
-                conditions_to_be_removed.append(cond)
+                if not trigger:
-        for remove_cond in conditions_to_be_removed:
+                    trigger = {}
-            conditions.remove(remove_cond)
+                if log.get_column_family() not in trigger:
-        return conditions
+                    trigger[log.get_column_family()] = []
                trigger[log.get_column_family()].append(log)
                cond.set_trigger(trigger)
    def check_and_trigger_conditions(self, conditions):
        for file_name in glob.glob(self.logs_path_prefix + '*'):
            # TODO(poojam23): find a way to distinguish between log files
            # - generated in the current experiment but are labeled 'old'
            # because they LOGs exceeded the file size limit  AND
            # - generated in some previous experiment that are also labeled
            # 'old' and were not deleted for some reason
            if re.search('old', file_name, re.IGNORECASE):
                continue
            with open(file_name, 'r') as db_logs:
                new_log = None
                for line in db_logs:
                    if not conditions:
                        break
                    if Log.is_new_log(line):
                        if new_log:
-                            conditions = self.trigger_appropriate_conditions(
+                            self.trigger_conditions_for_log(
-                                conditions,
+                                conditions, new_log
                                new_log
                            )
-                        new_log = Log(line)
+                        new_log = Log(line, self.column_families)
                    else:
                        # To account for logs split into multiple lines
                        new_log.append_message(line)
            # Check for the last log in the file.
-            if new_log and conditions:
+            if new_log:
-                conditions = self.trigger_appropriate_conditions(
+                self.trigger_conditions_for_log(conditions, new_log)
                    conditions,
                    new_log
                )
--- a/tools/advisor/advisor/db_options_parser.py
+++ b/tools/advisor/advisor/db_options_parser.py
@ -3,8 +3,11 @@
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
-from advisor.db_log_parser import DataSource
+import copy
 from advisor.db_log_parser import DataSource, NO_COL_FAMILY
 from advisor.ini_parser import IniParser
 from advisor.rule_parser import Condition, OptionCondition
 import os
 class OptionsSpecParser(IniParser):
@ -16,7 +19,8 @@ class OptionsSpecParser(IniParser):
    def get_section_type(line):
        '''
        Example section header: [TableOptions/BlockBasedTable "default"]
-        Here section_type returned would be 'TableOptions.BlockBasedTable'
+        Here ConfigurationOptimizer returned would be
        'TableOptions.BlockBasedTable'
        '''
        section_path = line.strip()[1:-1].split()[0]
        section_type = '.'.join(section_path.split('/'))
@ -29,79 +33,407 @@ class OptionsSpecParser(IniParser):
            return None
        return token_list[1]
    @staticmethod
    def get_section_str(section_type, section_name):
        # Example:
        # Case 1: get_section_str('DBOptions', NO_COL_FAMILY)
        # Case 2: get_section_str('TableOptions.BlockBasedTable', 'default')
        section_type = '/'.join(section_type.strip().split('.'))
        # Case 1: section_type = 'DBOptions'
        # Case 2: section_type = 'TableOptions/BlockBasedTable'
        section_str = '[' + section_type
        if section_name == NO_COL_FAMILY:
            # Case 1: '[DBOptions]'
            return (section_str + ']')
        else:
            # Case 2: '[TableOptions/BlockBasedTable "default"]'
            return section_str + ' "' + section_name + '"]'
    @staticmethod
    def get_option_str(key, values):
        option_str = key + '='
        # get_option_str('db_log_dir', None), returns 'db_log_dir='
        if values:
            # example:
            # get_option_str('max_bytes_for_level_multiplier_additional',
            # [1,1,1,1,1,1,1]), returned string:
            # 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1'
            if isinstance(values, list):
                for value in values:
                    option_str += (str(value) + ':')
                option_str = option_str[:-1]
            else:
                # example: get_option_str('write_buffer_size', 1048576)
                # returned string: 'write_buffer_size=1048576'
                option_str += str(values)
        return option_str
 class DatabaseOptions(DataSource):
-    def __init__(self, rocksdb_options):
+
    @staticmethod
    def is_misc_option(option_name):
        return '.' not in option_name
    @staticmethod
    def get_options_diff(opt_old, opt_new):
        # type: Dict[option, Dict[col_fam, value]] X 2 ->
        # Dict[option, Dict[col_fam, Tuple(old_value, new_value)]]
        # note: diff should contain a tuple of values only if they are
        # different from each other
        options_union = set(opt_old.keys()).union(set(opt_new.keys()))
        diff = {}
        for opt in options_union:
            diff[opt] = {}
            # if option in options_union, then it must be in one of the configs
            if opt not in opt_old:
                for col_fam in opt_new[opt]:
                    diff[opt][col_fam] = (None, opt_new[opt][col_fam])
            elif opt not in opt_new:
                for col_fam in opt_old[opt]:
                    diff[opt][col_fam] = (opt_old[opt][col_fam], None)
            else:
                for col_fam in opt_old[opt]:
                    if col_fam in opt_new[opt]:
                        if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
                            diff[opt][col_fam] = (
                                opt_old[opt][col_fam],
                                opt_new[opt][col_fam]
                            )
                    else:
                        diff[opt][col_fam] = (opt_old[opt][col_fam], None)
                for col_fam in opt_new[opt]:
                    if col_fam in opt_old[opt]:
                        if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
                            diff[opt][col_fam] = (
                                opt_old[opt][col_fam],
                                opt_new[opt][col_fam]
                            )
                    else:
                        diff[opt][col_fam] = (None, opt_new[opt][col_fam])
            if not diff[opt]:
                diff.pop(opt)
        return diff
    def __init__(self, rocksdb_options, misc_options=None):
        super().__init__(DataSource.Type.DB_OPTIONS)
-        self.options_path = rocksdb_options
+        # The options are stored in the following data structure:
-        # Load the options from the given file to a dictionary.
+        # Dict[section_type, Dict[section_name, Dict[option_name, value]]]
        self.load_from_source()
        self.options_dict = None
        self.column_families = None
        # Load the options from the given file to a dictionary.
        self.load_from_source(rocksdb_options)
        # Setup the miscellaneous options expected to be List[str], where each
        # element in the List has the format "<option_name>=<option_value>"
        # These options are the ones that are not yet supported by the Rocksdb
        # OPTIONS file, so they are provided separately
        self.setup_misc_options(misc_options)
    def setup_misc_options(self, misc_options):
        self.misc_options = {}
        if misc_options:
            for option_pair_str in misc_options:
                option_name = option_pair_str.split('=')[0].strip()
                option_value = option_pair_str.split('=')[1].strip()
                self.misc_options[option_name] = option_value
-    def load_from_source(self):
+    def load_from_source(self, options_path):
        self.options_dict = {}
-        with open(self.options_path, 'r') as db_options:
+        with open(options_path, 'r') as db_options:
            for line in db_options:
                line = OptionsSpecParser.remove_trailing_comment(line)
                if not line:
                    continue
                if OptionsSpecParser.is_section_header(line):
-                    curr_sec_type = OptionsSpecParser.get_section_type(line)
+                    curr_sec_type = (
                        OptionsSpecParser.get_section_type(line)
                    )
                    curr_sec_name = OptionsSpecParser.get_section_name(line)
-                    if curr_sec_name:
+                    if curr_sec_type not in self.options_dict:
-                        option_prefix = curr_sec_name + '.' + curr_sec_type
+                        self.options_dict[curr_sec_type] = {}
                    if not curr_sec_name:
                        curr_sec_name = NO_COL_FAMILY
                    self.options_dict[curr_sec_type][curr_sec_name] = {}
                    # example: if the line read from the Rocksdb OPTIONS file
                    # is [CFOptions "default"], then the section type is
                    # CFOptions and 'default' is the name of a column family
                    # that for this database, so it's added to the list of
                    # column families stored in this object
                    if curr_sec_type == 'CFOptions':
                        if not self.column_families:
                            self.column_families = []
                        self.column_families.append(curr_sec_name)
                    else:
                        option_prefix = curr_sec_type
                elif OptionsSpecParser.is_new_option(line):
                    key, value = OptionsSpecParser.get_key_value_pair(line)
-                    if not self.options_dict:
+                    self.options_dict[curr_sec_type][curr_sec_name][key] = (
-                        self.options_dict = {}
+                        value
-                    self.options_dict[option_prefix + '.' + key] = value
+                    )
                else:
                    error = 'Not able to parse line in Options file.'
                    OptionsSpecParser.exit_with_parse_error(line, error)
    def get_misc_options(self):
        # these are options that are not yet supported by the Rocksdb OPTIONS
        # file, hence they are provided and stored separately
        return self.misc_options
    def get_column_families(self):
        return self.column_families
    def get_all_options(self):
        # This method returns all the options that are stored in this object as
        # a: Dict[<sec_type>.<option_name>: Dict[col_fam, option_value]]
        all_options = []
        # Example: in the section header '[CFOptions "default"]' read from the
        # OPTIONS file, sec_type='CFOptions'
        for sec_type in self.options_dict:
            for col_fam in self.options_dict[sec_type]:
                for opt_name in self.options_dict[sec_type][col_fam]:
                    option = sec_type + '.' + opt_name
                    all_options.append(option)
        all_options.extend(list(self.misc_options.keys()))
        return self.get_options(all_options)
    def get_options(self, reqd_options):
        # type: List[str] -> Dict[str, Dict[str, Any]]
        # List[option] -> Dict[option, Dict[col_fam, value]]
        reqd_options_dict = {}
        for option in reqd_options:
            if DatabaseOptions.is_misc_option(option):
                # the option is not prefixed by '<section_type>.' because it is
                # not yet supported by the Rocksdb OPTIONS file; so it has to
                # be fetched from the misc_options dictionary
                if option not in self.misc_options:
                    continue
                if option not in reqd_options_dict:
                    reqd_options_dict[option] = {}
                reqd_options_dict[option][NO_COL_FAMILY] = (
                    self.misc_options[option]
                )
            else:
                # Example: option = 'TableOptions.BlockBasedTable.block_align'
                # then, sec_type = 'TableOptions.BlockBasedTable'
                sec_type = '.'.join(option.split('.')[:-1])
                # opt_name = 'block_align'
                opt_name = option.split('.')[-1]
                if sec_type not in self.options_dict:
                    continue
                for col_fam in self.options_dict[sec_type]:
                    if opt_name in self.options_dict[sec_type][col_fam]:
                        if option not in reqd_options_dict:
                            reqd_options_dict[option] = {}
                        reqd_options_dict[option][col_fam] = (
                            self.options_dict[sec_type][col_fam][opt_name]
                        )
        return reqd_options_dict
    def update_options(self, options):
        # An example 'options' object looks like:
        # {'DBOptions.max_background_jobs': {NO_COL_FAMILY: 2},
        # 'CFOptions.write_buffer_size': {'default': 1048576, 'cf_A': 128000},
        # 'bloom_bits': {NO_COL_FAMILY: 4}}
        for option in options:
            if DatabaseOptions.is_misc_option(option):
                # this is a misc_option i.e. an option that is not yet
                # supported by the Rocksdb OPTIONS file, so it is not prefixed
                # by '<section_type>.' and must be stored in the separate
                # misc_options dictionary
                if NO_COL_FAMILY not in options[option]:
                    print(
                        'WARNING(DatabaseOptions.update_options): not ' +
                        'updating option ' + option + ' because it is in ' +
                        'misc_option format but its scope is not ' +
                        NO_COL_FAMILY + '. Check format of option.'
                    )
                    continue
                self.misc_options[option] = options[option][NO_COL_FAMILY]
            else:
                sec_name = '.'.join(option.split('.')[:-1])
                opt_name = option.split('.')[-1]
                if sec_name not in self.options_dict:
                    self.options_dict[sec_name] = {}
                for col_fam in options[option]:
                    # if the option is not already present in the dictionary,
                    # it will be inserted, else it will be updated to the new
                    # value
                    if col_fam not in self.options_dict[sec_name]:
                        self.options_dict[sec_name][col_fam] = {}
                    self.options_dict[sec_name][col_fam][opt_name] = (
                        copy.deepcopy(options[option][col_fam])
                    )
    def generate_options_config(self, nonce):
        # this method generates a Rocksdb OPTIONS file in the INI format from
        # the options stored in self.options_dict
        this_path = os.path.abspath(os.path.dirname(__file__))
        file_name = '../temp/OPTIONS_' + str(nonce) + '.tmp'
        file_path = os.path.join(this_path, file_name)
        with open(file_path, 'w') as fp:
            for section in self.options_dict:
                for col_fam in self.options_dict[section]:
                    fp.write(
                        OptionsSpecParser.get_section_str(section, col_fam) +
                        '\n'
                    )
                    for option in self.options_dict[section][col_fam]:
                        values = self.options_dict[section][col_fam][option]
                        fp.write(
                            OptionsSpecParser.get_option_str(option, values) +
                            '\n'
                        )
                fp.write('\n')
        return file_path
    def check_and_trigger_conditions(self, conditions):
        '''
        For every condition, if the fields are not present set_trigger will
        not be called for it. Or if all the fields are present, then the
        trigger will be set to whatever the expression evaluates to.
        '''
        for cond in conditions:
-            # This contains the indices of options to whose name the column
+            reqd_options_dict = self.get_options(cond.options)
-            # family name needs to be prepended in order to create the full
+            # This contains the indices of options that are specific to some
-            # option name as parsed from the options file.
+            # column family and are not database-wide options.
            incomplete_option_ix = []
            ix = 0
            options = []
-            for option in cond.options:
+            missing_reqd_option = False
-                if option in self.options_dict.keys():
+            for ix, option in enumerate(cond.options):
-                    options.append(self.options_dict[option])
+                if option not in reqd_options_dict:
                    print(
                        'WARNING(DatabaseOptions.check_and_trigger): ' +
                        'skipping condition ' + cond.name + ' because it '
                        'requires option ' + option + ' but this option is' +
                        ' not available'
                    )
                    missing_reqd_option = True
                    break  # required option is absent
                if NO_COL_FAMILY in reqd_options_dict[option]:
                    options.append(reqd_options_dict[option][NO_COL_FAMILY])
                else:
                    options.append(None)
                    incomplete_option_ix.append(ix)
                    options.append(0)
                ix += 1
-            # if all the options were present as is:
+            if missing_reqd_option:
                continue
            # if all the options are database-wide options
            if not incomplete_option_ix:
-                if not eval(cond.eval_expr):
+                try:
-                    cond.set_trigger(cond.eval_expr)
+                    if eval(cond.eval_expr):
                        cond.set_trigger({NO_COL_FAMILY: options})
                except Exception as e:
                    print(
                        'WARNING(DatabaseOptions) check_and_trigger:' + str(e)
                    )
                continue
-            # for all the options that were not present as is, we prepend them
+            # for all the options that are not database-wide, we look for their
-            # their names with every column family found in options file.
+            # values specific to column families
            col_fam_options_dict = {}
            for col_fam in self.column_families:
                present = True
                for ix in incomplete_option_ix:
-                    full_option = col_fam + '.' + cond.options[ix]
+                    option = cond.options[ix]
-                    if full_option not in self.options_dict.keys():
+                    if col_fam not in reqd_options_dict[option]:
                        present = False
                        break
-                    options[ix] = self.options_dict[full_option]
+                    options[ix] = reqd_options_dict[option][col_fam]
-                if present and not eval(cond.eval_expr):
+                if present:
-                    cond.set_trigger(cond.eval_expr)
+                    try:
                        if eval(cond.eval_expr):
                            col_fam_options_dict[col_fam] = (
                                copy.deepcopy(options)
                            )
                    except Exception as e:
                        print(
                            'WARNING(DatabaseOptions) check_and_trigger: ' +
                            str(e)
                        )
            # Trigger for an OptionCondition object is of the form:
            # Dict[col_fam_name: List[option_value]]
            # where col_fam_name is the name of a column family for which
            # 'eval_expr' evaluated to True and List[option_value] is the list
            # of values of the options specified in the condition's 'options'
            # field
            if col_fam_options_dict:
                cond.set_trigger(col_fam_options_dict)
 # TODO(poojam23): remove these methods once the unit tests for this class are
 # in place
 def main():
    options_file = 'temp/OPTIONS_default.tmp'
    misc_options = ["misc_opt1=10", "misc_opt2=100", "misc_opt3=1000"]
    db_options = DatabaseOptions(options_file, misc_options)
    print(db_options.get_column_families())
    get_op = db_options.get_options([
        'DBOptions.db_log_dir',
        'DBOptions.is_fd_close_on_exec',
        'CFOptions.memtable_prefix_bloom_size_ratio',
        'TableOptions.BlockBasedTable.verify_compression',
        'misc_opt1',
        'misc_opt3'
    ])
    print(get_op)
    get_op['DBOptions.db_log_dir'][NO_COL_FAMILY] = 'some_random_path'
    get_op['CFOptions.memtable_prefix_bloom_size_ratio']['default'] = 2.31
    get_op['TableOptions.BlockBasedTable.verify_compression']['default'] = 4.4
    get_op['misc_opt2'] = {}
    get_op['misc_opt2'][NO_COL_FAMILY] = 2
    db_options.update_options(get_op)
    print('options updated in ' + db_options.generate_options_config(123))
    print('misc options ' + repr(db_options.get_misc_options()))
    options_file = 'temp/OPTIONS_123.tmp'
    db_options = DatabaseOptions(options_file, misc_options)
    # only CFOptions
    cond1 = Condition('opt-cond-1')
    cond1 = OptionCondition.create(cond1)
    cond1.set_parameter(
        'options', [
            'CFOptions.level0_file_num_compaction_trigger',
            'CFOptions.write_buffer_size',
            'CFOptions.max_bytes_for_level_base'
        ]
    )
    cond1.set_parameter(
        'evaluate',
        'int(options[0])*int(options[1])-int(options[2])>=0'
    )
    # only DBOptions
    cond2 = Condition('opt-cond-2')
    cond2 = OptionCondition.create(cond2)
    cond2.set_parameter(
        'options', [
            'DBOptions.max_file_opening_threads',
            'DBOptions.table_cache_numshardbits',
            'misc_opt2',
            'misc_opt3'
        ]
    )
    cond2_expr = (
        '(int(options[0])*int(options[2]))-' +
        '((4*int(options[1])*int(options[3]))/10)==0'
    )
    cond2.set_parameter('evaluate', cond2_expr)
    # mix of CFOptions and DBOptions
    cond3 = Condition('opt-cond-3')
    cond3 = OptionCondition.create(cond3)
    cond3.set_parameter(
        'options', [
            'DBOptions.max_background_jobs',  # 2
            'DBOptions.write_thread_slow_yield_usec',  # 3
            'CFOptions.num_levels',  # 7
            'misc_opt1'  # 10
        ]
    )
    cond3_expr = (
        '(int(options[3])*int(options[2]))-' +
        '(int(options[1])*int(options[0]))==64'
    )
    cond3.set_parameter('evaluate', cond3_expr)
    db_options.check_and_trigger_conditions([cond1, cond2, cond3])
    print(cond1.get_trigger())  # {'col-fam-B': ['4', '10', '10']}
    print(cond2.get_trigger())  # {'DB_WIDE': ['16', '4']}
    # {'col-fam-B': ['2', '3', '10'], 'col-fam-A': ['2', '3', '7']}
    print(cond3.get_trigger())
 if __name__ == "__main__":
    main()
--- a/tools/advisor/advisor/db_stats_fetcher.py
+++ b/tools/advisor/advisor/db_stats_fetcher.py
@ -0,0 +1,421 @@
 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 #  This source code is licensed under both the GPLv2 (found in the
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 from advisor.db_log_parser import Log
 from advisor.db_timeseries_parser import TimeSeriesData, NO_ENTITY
 from advisor.rule_parser import Condition, TimeSeriesCondition
 import copy
 import glob
 import re
 import subprocess
 import time
 class LogStatsParser(TimeSeriesData):
    STATS = 'STATISTICS:'
    @staticmethod
    def parse_log_line_for_stats(log_line):
        # Example stat line (from LOG file):
        # "rocksdb.db.get.micros P50 : 8.4 P95 : 21.8 P99 : 33.9 P100 : 92.0\n"
        token_list = log_line.strip().split()
        # token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':',
        # '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0']
        stat_prefix = token_list[0] + '.'  # 'rocksdb.db.get.micros.'
        stat_values = [
            token
            for token in token_list[1:]
            if token != ':'
        ]
        # stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100',
        # '92.0']
        stat_dict = {}
        for ix, metric in enumerate(stat_values):
            if ix % 2 == 0:
                stat_name = stat_prefix + metric
                stat_name = stat_name.lower()  # Note: case insensitive names
            else:
                stat_dict[stat_name] = float(metric)
        # stat_dict = {'rocksdb.db.get.micros.p50': 8.4,
        # 'rocksdb.db.get.micros.p95': 21.8, 'rocksdb.db.get.micros.p99': 33.9,
        # 'rocksdb.db.get.micros.p100': 92.0}
        return stat_dict
    def __init__(self, logs_path_prefix, stats_freq_sec):
        super().__init__()
        self.logs_file_prefix = logs_path_prefix
        self.stats_freq_sec = stats_freq_sec
        self.duration_sec = 60
    def get_keys_from_conditions(self, conditions):
        # Note: case insensitive stat names
        reqd_stats = []
        for cond in conditions:
            for key in cond.keys:
                key = key.lower()
                # some keys are prepended with '[]' for OdsStatsFetcher to
                # replace this with the appropriate key_prefix, remove these
                # characters here since the LogStatsParser does not need
                # a prefix
                if key.startswith('[]'):
                    reqd_stats.append(key[2:])
                else:
                    reqd_stats.append(key)
        return reqd_stats
    def add_to_timeseries(self, log, reqd_stats):
        # this method takes in the Log object that contains the Rocksdb stats
        # and a list of required stats, then it parses the stats line by line
        # to fetch required stats and add them to the keys_ts object
        # Example: reqd_stats = ['rocksdb.block.cache.hit.count',
        # 'rocksdb.db.get.micros.p99']
        # Let log.get_message() returns following string:
        # "[WARN] [db/db_impl.cc:485] STATISTICS:\n
        # rocksdb.block.cache.miss COUNT : 1459\n
        # rocksdb.block.cache.hit COUNT : 37\n
        # ...
        # rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n
        # ..."
        new_lines = log.get_message().split('\n')
        # let log_ts = 1532518219
        log_ts = log.get_timestamp()
        # example updates to keys_ts:
        # keys_ts[NO_ENTITY]['rocksdb.db.get.micros.p99'][1532518219] = 62.6
        # keys_ts[NO_ENTITY]['rocksdb.block.cache.hit.count'][1532518219] = 37
        for line in new_lines[1:]:  # new_lines[0] does not contain any stats
            stats_on_line = self.parse_log_line_for_stats(line)
            for stat in stats_on_line:
                if stat in reqd_stats:
                    if stat not in self.keys_ts[NO_ENTITY]:
                        self.keys_ts[NO_ENTITY][stat] = {}
                    self.keys_ts[NO_ENTITY][stat][log_ts] = stats_on_line[stat]
    def fetch_timeseries(self, reqd_stats):
        # this method parses the Rocksdb LOG file and generates timeseries for
        # each of the statistic in the list reqd_stats
        self.keys_ts = {NO_ENTITY: {}}
        for file_name in glob.glob(self.logs_file_prefix + '*'):
            # TODO(poojam23): find a way to distinguish between 'old' log files
            # from current and previous experiments, present in the same
            # directory
            if re.search('old', file_name, re.IGNORECASE):
                continue
            with open(file_name, 'r') as db_logs:
                new_log = None
                for line in db_logs:
                    if Log.is_new_log(line):
                        if (
                            new_log and
                            re.search(self.STATS, new_log.get_message())
                        ):
                            self.add_to_timeseries(new_log, reqd_stats)
                        new_log = Log(line, column_families=[])
                    else:
                        # To account for logs split into multiple lines
                        new_log.append_message(line)
            # Check for the last log in the file.
            if new_log and re.search(self.STATS, new_log.get_message()):
                self.add_to_timeseries(new_log, reqd_stats)
 class DatabasePerfContext(TimeSeriesData):
    # TODO(poojam23): check if any benchrunner provides PerfContext sampled at
    # regular intervals
    def __init__(self, perf_context_ts, stats_freq_sec=0, cumulative=True):
        '''
        perf_context_ts is expected to be in the following format:
        Dict[metric, Dict[timestamp, value]], where for
        each (metric, timestamp) pair, the value is database-wide (i.e.
        summed over all the threads involved)
        if stats_freq_sec == 0, per-metric only one value is reported
        '''
        super().__init__()
        self.stats_freq_sec = stats_freq_sec
        self.keys_ts = {NO_ENTITY: perf_context_ts}
        if cumulative:
            self.unaccumulate_metrics()
    def unaccumulate_metrics(self):
        # if the perf context metrics provided are cumulative in nature, this
        # method can be used to convert them to a disjoint format
        epoch_ts = copy.deepcopy(self.keys_ts)
        for stat in self.keys_ts[NO_ENTITY]:
            timeseries = sorted(
                list(self.keys_ts[NO_ENTITY][stat].keys()), reverse=True
            )
            if len(timeseries) < 2:
                continue
            for ix, ts in enumerate(timeseries[:-1]):
                epoch_ts[NO_ENTITY][stat][ts] = (
                    epoch_ts[NO_ENTITY][stat][ts] -
                    epoch_ts[NO_ENTITY][stat][timeseries[ix+1]]
                )
                if epoch_ts[NO_ENTITY][stat][ts] < 0:
                    raise ValueError('DBPerfContext: really cumulative?')
            # drop the smallest timestamp in the timeseries for this metric
            epoch_ts[NO_ENTITY][stat].pop(timeseries[-1])
        self.keys_ts = epoch_ts
    def get_keys_from_conditions(self, conditions):
        reqd_stats = []
        for cond in conditions:
            reqd_stats.extend([key.lower() for key in cond.keys])
        return reqd_stats
    def fetch_timeseries(self, statistics):
        # this method is redundant for DatabasePerfContext because the __init__
        # does the job of populating 'keys_ts'
        pass
 class OdsStatsFetcher(TimeSeriesData):
    # class constants
    OUTPUT_FILE = 'temp/stats_out.tmp'
    ERROR_FILE = 'temp/stats_err.tmp'
    RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime"
    ODS_COMMAND = '%s %s %s'  # client, entities, keys
    # static methods
    @staticmethod
    def _get_string_in_quotes(value):
        return '"' + str(value) + '"'
    @staticmethod
    def _get_time_value_pair(pair_string):
        # example pair_string: '[1532544591, 97.3653601828]'
        pair_string = pair_string.replace('[', '')
        pair_string = pair_string.replace(']', '')
        pair = pair_string.split(',')
        first = int(pair[0].strip())
        second = float(pair[1].strip())
        return [first, second]
    def __init__(self, client, entities, key_prefix=None):
        super().__init__()
        self.client = client
        self.entities = entities
        self.key_prefix = key_prefix
        self.stats_freq_sec = 60
        self.duration_sec = 60
        # Fetch last 3 hours data by default
        self.end_time = int(time.time())
        self.start_time = self.end_time - (3 * 60 * 60)
    def execute_script(self, command):
        print('executing...')
        print(command)
        out_file = open(self.OUTPUT_FILE, "w+")
        err_file = open(self.ERROR_FILE, "w+")
        subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
        out_file.close()
        err_file.close()
    def parse_rapido_output(self):
        # Output looks like the following:
        # <entity_name>\t<key_name>\t[[ts, value], [ts, value], ...]
        # ts = timestamp; value = value of key_name in entity_name at time ts
        self.keys_ts = {}
        with open(self.OUTPUT_FILE, 'r') as fp:
            for line in fp:
                token_list = line.strip().split('\t')
                entity = token_list[0]
                key = token_list[1]
                if entity not in self.keys_ts:
                    self.keys_ts[entity] = {}
                if key not in self.keys_ts[entity]:
                    self.keys_ts[entity][key] = {}
                list_of_lists = [
                    self._get_time_value_pair(pair_string)
                    for pair_string in token_list[2].split('],')
                ]
                value = {pair[0]: pair[1] for pair in list_of_lists}
                self.keys_ts[entity][key] = value
    def parse_ods_output(self):
        # Output looks like the following:
        # <entity_name>\t<key_name>\t<timestamp>\t<value>
        # there is one line per (entity_name, key_name, timestamp)
        self.keys_ts = {}
        with open(self.OUTPUT_FILE, 'r') as fp:
            for line in fp:
                token_list = line.split()
                entity = token_list[0]
                if entity not in self.keys_ts:
                    self.keys_ts[entity] = {}
                key = token_list[1]
                if key not in self.keys_ts[entity]:
                    self.keys_ts[entity][key] = {}
                self.keys_ts[entity][key][token_list[2]] = token_list[3]
    def fetch_timeseries(self, statistics):
        # this method fetches the timeseries of required stats from the ODS
        # service and populates the 'keys_ts' object appropriately
        print('OdsStatsFetcher: fetching ' + str(statistics))
        if re.search('rapido', self.client, re.IGNORECASE):
            command = self.RAPIDO_COMMAND % (
                self.client,
                self._get_string_in_quotes(self.entities),
                self._get_string_in_quotes(','.join(statistics)),
                self._get_string_in_quotes(self.start_time),
                self._get_string_in_quotes(self.end_time)
            )
            # Run the tool and fetch the time-series data
            self.execute_script(command)
            # Parse output and populate the 'keys_ts' map
            self.parse_rapido_output()
        elif re.search('ods', self.client, re.IGNORECASE):
            command = self.ODS_COMMAND % (
                self.client,
                self._get_string_in_quotes(self.entities),
                self._get_string_in_quotes(','.join(statistics))
            )
            # Run the tool and fetch the time-series data
            self.execute_script(command)
            # Parse output and populate the 'keys_ts' map
            self.parse_ods_output()
    def get_keys_from_conditions(self, conditions):
        reqd_stats = []
        for cond in conditions:
            for key in cond.keys:
                use_prefix = False
                if key.startswith('[]'):
                    use_prefix = True
                    key = key[2:]
                # TODO(poojam23): this is very hacky and needs to be improved
                if key.startswith("rocksdb"):
                    key += ".60"
                if use_prefix:
                    if not self.key_prefix:
                        print('Warning: OdsStatsFetcher might need key prefix')
                        print('for the key: ' + key)
                    else:
                        key = self.key_prefix + "." + key
                reqd_stats.append(key)
        return reqd_stats
    def fetch_rate_url(self, entities, keys, window_len, percent, display):
        # type: (List[str], List[str], str, str, bool) -> str
        transform_desc = (
            "rate(" + str(window_len) + ",duration=" + str(self.duration_sec)
        )
        if percent:
            transform_desc = transform_desc + ",%)"
        else:
            transform_desc = transform_desc + ")"
        command = self.RAPIDO_COMMAND + " --transform=%s --url=%s"
        command = command % (
            self.client,
            self._get_string_in_quotes(','.join(entities)),
            self._get_string_in_quotes(','.join(keys)),
            self._get_string_in_quotes(self.start_time),
            self._get_string_in_quotes(self.end_time),
            self._get_string_in_quotes(transform_desc),
            self._get_string_in_quotes(display)
        )
        self.execute_script(command)
        url = ""
        with open(self.OUTPUT_FILE, 'r') as fp:
            url = fp.readline()
        return url
 # TODO(poojam23): remove these blocks once the unittests for LogStatsParser are
 # in place
 def main():
    # populating the statistics
    log_stats = LogStatsParser('temp/db_stats_fetcher_main_LOG.tmp', 20)
    print(log_stats.type)
    print(log_stats.keys_ts)
    print(log_stats.logs_file_prefix)
    print(log_stats.stats_freq_sec)
    print(log_stats.duration_sec)
    statistics = [
        'rocksdb.number.rate_limiter.drains.count',
        'rocksdb.number.block.decompressed.count',
        'rocksdb.db.get.micros.p50',
        'rocksdb.manifest.file.sync.micros.p99',
        'rocksdb.db.get.micros.p99'
    ]
    log_stats.fetch_timeseries(statistics)
    print()
    print(log_stats.keys_ts)
    # aggregated statistics
    print()
    print(log_stats.fetch_aggregated_values(
        NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.latest
    ))
    print(log_stats.fetch_aggregated_values(
        NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.oldest
    ))
    print(log_stats.fetch_aggregated_values(
        NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.max
    ))
    print(log_stats.fetch_aggregated_values(
        NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.min
    ))
    print(log_stats.fetch_aggregated_values(
        NO_ENTITY, statistics, TimeSeriesData.AggregationOperator.avg
    ))
    # condition 'evaluate_expression' that evaluates to true
    cond1 = Condition('cond-1')
    cond1 = TimeSeriesCondition.create(cond1)
    cond1.set_parameter('keys', statistics)
    cond1.set_parameter('behavior', 'evaluate_expression')
    cond1.set_parameter('evaluate', 'keys[3]-keys[2]>=0')
    cond1.set_parameter('aggregation_op', 'avg')
    # condition 'evaluate_expression' that evaluates to false
    cond2 = Condition('cond-2')
    cond2 = TimeSeriesCondition.create(cond2)
    cond2.set_parameter('keys', statistics)
    cond2.set_parameter('behavior', 'evaluate_expression')
    cond2.set_parameter('evaluate', '((keys[1]-(2*keys[0]))/100)<3000')
    cond2.set_parameter('aggregation_op', 'latest')
    # condition 'evaluate_expression' that evaluates to true; no aggregation_op
    cond3 = Condition('cond-3')
    cond3 = TimeSeriesCondition.create(cond3)
    cond3.set_parameter('keys', [statistics[2], statistics[3]])
    cond3.set_parameter('behavior', 'evaluate_expression')
    cond3.set_parameter('evaluate', '(keys[1]/keys[0])>23')
    # check remaining methods
    conditions = [cond1, cond2, cond3]
    print()
    print(log_stats.get_keys_from_conditions(conditions))
    log_stats.check_and_trigger_conditions(conditions)
    print()
    print(cond1.get_trigger())
    print(cond2.get_trigger())
    print(cond3.get_trigger())
 # TODO(poojam23): shift this code to the unit tests for DatabasePerfContext
 def check_perf_context_code():
    string = (
        " user_key_comparison_count = 675903942, " +
        "block_cache_hit_count = 830086, " +
        "get_from_output_files_time = 85088293818, " +
        "seek_on_memtable_time = 0,"
    )
    token_list = string.split(',')
    perf_context = {
        token.split('=')[0].strip(): int(token.split('=')[1].strip())
        for token in token_list
        if token
    }
    timestamp = int(time.time())
    perf_ts = {}
    for key in perf_context:
        perf_ts[key] = {}
        start_val = perf_context[key]
        for ix in range(5):
            perf_ts[key][timestamp+(ix*10)] = start_val + (2 * ix)
    db_perf_context = DatabasePerfContext(perf_ts, 10, True)
    print(db_perf_context.keys_ts)
 if __name__ == '__main__':
    main()
    check_perf_context_code()
--- a/tools/advisor/advisor/db_timeseries_parser.py
+++ b/tools/advisor/advisor/db_timeseries_parser.py
@ -0,0 +1,208 @@
 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 #  This source code is licensed under both the GPLv2 (found in the
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 from abc import abstractmethod
 from advisor.db_log_parser import DataSource
 from enum import Enum
 import math
 NO_ENTITY = 'ENTITY_PLACEHOLDER'
 class TimeSeriesData(DataSource):
    class Behavior(Enum):
        bursty = 1
        evaluate_expression = 2
    class AggregationOperator(Enum):
        avg = 1
        max = 2
        min = 3
        latest = 4
        oldest = 5
    def __init__(self):
        super().__init__(DataSource.Type.TIME_SERIES)
        self.keys_ts = None  # Dict[entity, Dict[key, Dict[timestamp, value]]]
        self.stats_freq_sec = None
    @abstractmethod
    def get_keys_from_conditions(self, conditions):
        # This method takes in a list of time-series conditions; for each
        # condition it manipulates the 'keys' in the way that is supported by
        # the subclass implementing this method
        pass
    @abstractmethod
    def fetch_timeseries(self, required_statistics):
        # this method takes in a list of statistics and fetches the timeseries
        # for each of them and populates the 'keys_ts' dictionary
        pass
    def fetch_burst_epochs(
        self, entities, statistic, window_sec, threshold, percent
    ):
        # type: (str, int, float, bool) -> Dict[str, Dict[int, float]]
        # this method calculates the (percent) rate change in the 'statistic'
        # for each entity (over 'window_sec' seconds) and returns the epochs
        # where this rate change is greater than or equal to the 'threshold'
        # value
        if self.stats_freq_sec == 0:
            # not time series data, cannot check for bursty behavior
            return
        if window_sec < self.stats_freq_sec:
            window_sec = self.stats_freq_sec
        # 'window_samples' is the number of windows to go back to
        # compare the current window with, while calculating rate change.
        window_samples = math.ceil(window_sec / self.stats_freq_sec)
        burst_epochs = {}
        # if percent = False:
        # curr_val = value at window for which rate change is being calculated
        # prev_val = value at window that is window_samples behind curr_window
        # Then rate_without_percent =
        # ((curr_val-prev_val)*duration_sec)/(curr_timestamp-prev_timestamp)
        # if percent = True:
        # rate_with_percent = (rate_without_percent * 100) / prev_val
        # These calculations are in line with the rate() transform supported
        # by ODS
        for entity in entities:
            if statistic not in self.keys_ts[entity]:
                continue
            timestamps = sorted(list(self.keys_ts[entity][statistic].keys()))
            for ix in range(window_samples, len(timestamps), 1):
                first_ts = timestamps[ix - window_samples]
                last_ts = timestamps[ix]
                first_val = self.keys_ts[entity][statistic][first_ts]
                last_val = self.keys_ts[entity][statistic][last_ts]
                diff = last_val - first_val
                if percent:
                    diff = diff * 100 / first_val
                rate = (diff * self.duration_sec) / (last_ts - first_ts)
                # if the rate change is greater than the provided threshold,
                # then the condition is triggered for entity at time 'last_ts'
                if rate >= threshold:
                    if entity not in burst_epochs:
                        burst_epochs[entity] = {}
                    burst_epochs[entity][last_ts] = rate
        return burst_epochs
    def fetch_aggregated_values(self, entity, statistics, aggregation_op):
        # type: (str, AggregationOperator) -> Dict[str, float]
        # this method performs the aggregation specified by 'aggregation_op'
        # on the timeseries of 'statistics' for 'entity' and returns:
        # Dict[statistic, aggregated_value]
        result = {}
        for stat in statistics:
            if stat not in self.keys_ts[entity]:
                continue
            agg_val = None
            if aggregation_op is self.AggregationOperator.latest:
                latest_timestamp = max(list(self.keys_ts[entity][stat].keys()))
                agg_val = self.keys_ts[entity][stat][latest_timestamp]
            elif aggregation_op is self.AggregationOperator.oldest:
                oldest_timestamp = min(list(self.keys_ts[entity][stat].keys()))
                agg_val = self.keys_ts[entity][stat][oldest_timestamp]
            elif aggregation_op is self.AggregationOperator.max:
                agg_val = max(list(self.keys_ts[entity][stat].values()))
            elif aggregation_op is self.AggregationOperator.min:
                agg_val = min(list(self.keys_ts[entity][stat].values()))
            elif aggregation_op is self.AggregationOperator.avg:
                values = list(self.keys_ts[entity][stat].values())
                agg_val = sum(values) / len(values)
            result[stat] = agg_val
        return result
    def check_and_trigger_conditions(self, conditions):
        # get the list of statistics that need to be fetched
        reqd_keys = self.get_keys_from_conditions(conditions)
        # fetch the required statistics and populate the map 'keys_ts'
        self.fetch_timeseries(reqd_keys)
        # Trigger the appropriate conditions
        for cond in conditions:
            complete_keys = self.get_keys_from_conditions([cond])
            # Get the entities that have all statistics required by 'cond':
            # an entity is checked for a given condition only if we possess all
            # of the condition's 'keys' for that entity
            entities_with_stats = []
            for entity in self.keys_ts:
                stat_missing = False
                for stat in complete_keys:
                    if stat not in self.keys_ts[entity]:
                        stat_missing = True
                        break
                if not stat_missing:
                    entities_with_stats.append(entity)
            if not entities_with_stats:
                continue
            if cond.behavior is self.Behavior.bursty:
                # for a condition that checks for bursty behavior, only one key
                # should be present in the condition's 'keys' field
                result = self.fetch_burst_epochs(
                    entities_with_stats,
                    complete_keys[0],  # there should be only one key
                    cond.window_sec,
                    cond.rate_threshold,
                    True
                )
                # Trigger in this case is:
                # Dict[entity_name, Dict[timestamp, rate_change]]
                # where the inner dictionary contains rate_change values when
                # the rate_change >= threshold provided, with the
                # corresponding timestamps
                if result:
                    cond.set_trigger(result)
            elif cond.behavior is self.Behavior.evaluate_expression:
                self.handle_evaluate_expression(
                    cond,
                    complete_keys,
                    entities_with_stats
                )
    def handle_evaluate_expression(self, condition, statistics, entities):
        trigger = {}
        # check 'condition' for each of these entities
        for entity in entities:
            if hasattr(condition, 'aggregation_op'):
                # in this case, the aggregation operation is performed on each
                # of the condition's 'keys' and then with aggregated values
                # condition's 'expression' is evaluated; if it evaluates to
                # True, then list of the keys values is added to the
                # condition's trigger: Dict[entity_name, List[stats]]
                result = self.fetch_aggregated_values(
                        entity, statistics, condition.aggregation_op
                )
                keys = [result[key] for key in statistics]
                try:
                    if eval(condition.expression):
                        trigger[entity] = keys
                except Exception as e:
                    print(
                        'WARNING(TimeSeriesData) check_and_trigger: ' + str(e)
                    )
            else:
                # assumption: all stats have same series of timestamps
                # this is similar to the above but 'expression' is evaluated at
                # each timestamp, since there is no aggregation, and all the
                # epochs are added to the trigger when the condition's
                # 'expression' evaluated to true; so trigger is:
                # Dict[entity, Dict[timestamp, List[stats]]]
                for epoch in self.keys_ts[entity][statistics[0]].keys():
                    keys = [
                        self.keys_ts[entity][key][epoch]
                        for key in statistics
                    ]
                    try:
                        if eval(condition.expression):
                            if entity not in trigger:
                                trigger[entity] = {}
                            trigger[entity][epoch] = keys
                    except Exception as e:
                        print(
                            'WARNING(TimeSeriesData) check_and_trigger: ' +
                            str(e)
                        )
        if trigger:
            condition.set_trigger(trigger)
--- a/tools/advisor/advisor/ini_parser.py
+++ b/tools/advisor/advisor/ini_parser.py
@ -62,8 +62,8 @@ class IniParser:
    def get_key_value_pair(line):
        line = line.strip()
        key = line.split('=')[0].strip()
-        value = line.split('=')[1].strip()
+        value = "=".join(line.split('=')[1:])
-        if not value:
+        if value == "":  # if the option has no value
            return (key, None)
        values = IniParser.get_list_from_value(value)
        if len(values) == 1:
--- a/tools/advisor/advisor/rule_parser.py
+++ b/tools/advisor/advisor/rule_parser.py
@ -4,11 +4,11 @@
 #  (found in the LICENSE.Apache file in the root directory).
 from abc import ABC, abstractmethod
-import argparse
+from advisor.db_log_parser import DataSource, NO_COL_FAMILY
-from advisor.db_log_parser import DatabaseLogs, DataSource
+from advisor.db_timeseries_parser import TimeSeriesData
 from advisor.db_options_parser import DatabaseOptions
 from enum import Enum
 from advisor.ini_parser import IniParser
 import re
 class Section(ABC):
@ -29,6 +29,9 @@ class Rule(Section):
        super().__init__(name)
        self.conditions = None
        self.suggestions = None
        self.overlap_time_seconds = None
        self.trigger_entities = None
        self.trigger_column_families = None
    def set_parameter(self, key, value):
        # If the Rule is associated with a single suggestion/condition, then
@ -45,6 +48,8 @@ class Rule(Section):
                self.suggestions = [value]
            else:
                self.suggestions = value
        elif key == 'overlap_time_period':
            self.overlap_time_seconds = value
    def get_suggestions(self):
        return self.suggestions
@ -58,12 +63,133 @@ class Rule(Section):
            raise ValueError(
                self.name + ': rule must have at least one suggestion'
            )
        if self.overlap_time_seconds:
            if len(self.conditions) != 2:
                raise ValueError(
                    self.name + ": rule must be associated with 2 conditions\
                    in order to check for a time dependency between them"
                )
            time_format = '^\d+[s|m|h|d]$'
            if (
                not
                re.match(time_format, self.overlap_time_seconds, re.IGNORECASE)
            ):
                raise ValueError(
                    self.name + ": overlap_time_seconds format: \d+[s|m|h|d]"
                )
            else:  # convert to seconds
                in_seconds = int(self.overlap_time_seconds[:-1])
                if self.overlap_time_seconds[-1] == 'm':
                    in_seconds *= 60
                elif self.overlap_time_seconds[-1] == 'h':
                    in_seconds *= (60 * 60)
                elif self.overlap_time_seconds[-1] == 'd':
                    in_seconds *= (24 * 60 * 60)
                self.overlap_time_seconds = in_seconds
    def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs):
        # this method takes in 2 timeseries i.e. timestamps at which the
        # rule's 2 TIME_SERIES conditions were triggered and it finds
        # (if present) the first pair of timestamps at which the 2 conditions
        # were triggered within 'overlap_time_seconds' of each other
        key1_lower_bounds = [
            epoch - self.overlap_time_seconds
            for epoch in key1_trigger_epochs
        ]
        key1_lower_bounds.sort()
        key2_trigger_epochs.sort()
        trigger_ix = 0
        overlap_pair = None
        for key1_lb in key1_lower_bounds:
            while (
                key2_trigger_epochs[trigger_ix] < key1_lb and
                trigger_ix < len(key2_trigger_epochs)
            ):
                trigger_ix += 1
            if trigger_ix >= len(key2_trigger_epochs):
                break
            if (
                key2_trigger_epochs[trigger_ix] <=
                key1_lb + (2 * self.overlap_time_seconds)
            ):
                overlap_pair = (
                    key2_trigger_epochs[trigger_ix],
                    key1_lb + self.overlap_time_seconds
                )
                break
        return overlap_pair
    def get_trigger_entities(self):
        return self.trigger_entities
    def get_trigger_column_families(self):
        return self.trigger_column_families
    def is_triggered(self, conditions_dict, column_families):
        if self.overlap_time_seconds:
            condition1 = conditions_dict[self.conditions[0]]
            condition2 = conditions_dict[self.conditions[1]]
            if not (
                condition1.get_data_source() is DataSource.Type.TIME_SERIES and
                condition2.get_data_source() is DataSource.Type.TIME_SERIES
            ):
                raise ValueError(self.name + ': need 2 timeseries conditions')
            map1 = condition1.get_trigger()
            map2 = condition2.get_trigger()
            if not (map1 and map2):
                return False
-    def is_triggered(self, conditions_dict):
+            self.trigger_entities = {}
-        condition_triggers = []
+            is_triggered = False
-        for cond in self.conditions:
+            entity_intersection = (
-            condition_triggers.append(conditions_dict[cond].is_triggered())
+                set(map1.keys()).intersection(set(map2.keys()))
-        return all(condition_triggers)
+            )
            for entity in entity_intersection:
                overlap_timestamps_pair = (
                    self.get_overlap_timestamps(
                        list(map1[entity].keys()), list(map2[entity].keys())
                    )
                )
                if overlap_timestamps_pair:
                    self.trigger_entities[entity] = overlap_timestamps_pair
                    is_triggered = True
            if is_triggered:
                self.trigger_column_families = set(column_families)
            return is_triggered
        else:
            all_conditions_triggered = True
            self.trigger_column_families = set(column_families)
            for cond_name in self.conditions:
                cond = conditions_dict[cond_name]
                if not cond.get_trigger():
                    all_conditions_triggered = False
                    break
                if (
                    cond.get_data_source() is DataSource.Type.LOG or
                    cond.get_data_source() is DataSource.Type.DB_OPTIONS
                ):
                    cond_col_fam = set(cond.get_trigger().keys())
                    if NO_COL_FAMILY in cond_col_fam:
                        cond_col_fam = set(column_families)
                    self.trigger_column_families = (
                        self.trigger_column_families.intersection(cond_col_fam)
                    )
                elif cond.get_data_source() is DataSource.Type.TIME_SERIES:
                    cond_entities = set(cond.get_trigger().keys())
                    if self.trigger_entities is None:
                        self.trigger_entities = cond_entities
                    else:
                        self.trigger_entities = (
                            self.trigger_entities.intersection(cond_entities)
                        )
                if not (self.trigger_entities or self.trigger_column_families):
                    all_conditions_triggered = False
                    break
            if not all_conditions_triggered:  # clean up if rule not triggered
                self.trigger_column_families = None
                self.trigger_entities = None
            return all_conditions_triggered
    def __repr__(self):
        # Append conditions
@ -84,6 +210,10 @@ class Rule(Section):
                is_first = False
            else:
                rule_string += (", " + sugg)
        if self.trigger_entities:
            rule_string += (', entities:: ' + str(self.trigger_entities))
        if self.trigger_column_families:
            rule_string += (', col_fam:: ' + str(self.trigger_column_families))
        # Return constructed string
        return rule_string
@ -98,18 +228,27 @@ class Suggestion(Section):
        super().__init__(name)
        self.option = None
        self.action = None
-        self.suggested_value = None
+        self.suggested_values = None
        self.description = None
    def set_parameter(self, key, value):
        if key == 'option':
            # Note:
            # case 1: 'option' is supported by Rocksdb OPTIONS file; in this
            # case the option belongs to one of the sections in the config
            # file and it's name is prefixed by "<section_type>."
            # case 2: 'option' is not supported by Rocksdb OPTIONS file; the
            # option is not expected to have the character '.' in its name
            self.option = value
        elif key == 'action':
            if self.option and not value:
                raise ValueError(self.name + ': provide action for option')
            self.action = self.Action[value]
-        elif key == 'suggested_value':
+        elif key == 'suggested_values':
-            self.suggested_value = value
+            if isinstance(value, str):
                self.suggested_values = [value]
            else:
                self.suggested_values = value
        elif key == 'description':
            self.description = value
@ -119,33 +258,28 @@ class Suggestion(Section):
                raise ValueError(self.name + ': provide option or description')
            if not self.action:
                raise ValueError(self.name + ': provide action for option')
-            if self.action is self.Action.set and not self.suggested_value:
+            if self.action is self.Action.set and not self.suggested_values:
                raise ValueError(
                    self.name + ': provide suggested value for option'
                )
    def __repr__(self):
        sugg_string = "Suggestion: " + self.name
        if self.description:
-            return self.description
+            sugg_string += (' description : ' + self.description)
        sugg_string = ""
        if self.action is self.Action.set:
            sugg_string = (
                self.name + ' suggests setting ' + self.option +
                ' to ' + self.suggested_value
            )
        else:
            sugg_string = self.name + ' suggests ' + self.action.name + ' in '
            sugg_string += (self.option + '.')
            if self.suggested_value:
            sugg_string += (
-                    ' The suggested value is ' + self.suggested_value
+                ' option : ' + self.option + ' action : ' + self.action.name
            )
            if self.suggested_values:
                sugg_string += (
                    ' suggested_values : ' + str(self.suggested_values)
                )
        return sugg_string
 class Condition(Section):
    def __init__(self, name):
        # a rule is identified by its name, so there should be no duplicates
        super().__init__(name)
        self.data_source = None
        self.trigger = None
@ -166,6 +300,9 @@ class Condition(Section):
    def set_trigger(self, condition_trigger):
        self.trigger = condition_trigger
    def get_trigger(self):
        return self.trigger
    def is_triggered(self):
        if self.trigger:
            return True
@ -173,7 +310,7 @@ class Condition(Section):
    def set_parameter(self, key, value):
        # must be defined by the subclass
-        raise ValueError(self.name + ': provide source for condition')
+        raise NotImplementedError(self.name + ': provide source for condition')
 class LogCondition(Condition):
@ -183,15 +320,9 @@ class LogCondition(Condition):
        base_condition.__class__ = cls
        return base_condition
    class Scope(Enum):
        database = 1
        column_family = 2
    def set_parameter(self, key, value):
        if key == 'regex':
            self.regex = value
        elif key == 'scope':
            self.scope = self.Scope[value]
    def perform_checks(self):
        super().perform_checks()
@ -199,10 +330,10 @@ class LogCondition(Condition):
            raise ValueError(self.name + ': provide regex for log condition')
    def __repr__(self):
-        log_cond_str = (
+        log_cond_str = "LogCondition: " + self.name
-            self.name + ' checks if the regex ' + self.regex + ' is found ' +
+        log_cond_str += (" regex: " + self.regex)
-            ' in the LOG file in the scope of ' + self.scope.name
+        # if self.trigger:
-        )
+        #     log_cond_str += (" trigger: " + str(self.trigger))
        return log_cond_str
@ -215,8 +346,11 @@ class OptionCondition(Condition):
    def set_parameter(self, key, value):
        if key == 'options':
            if isinstance(value, str):
                self.options = [value]
            else:
                self.options = value
-        if key == 'evaluate':
+        elif key == 'evaluate':
            self.eval_expr = value
    def perform_checks(self):
@ -227,15 +361,77 @@ class OptionCondition(Condition):
            raise ValueError(self.name + ': expression missing in condition')
    def __repr__(self):
-        log_cond_str = (
+        opt_cond_str = "OptionCondition: " + self.name
-            self.name + ' checks if the given expression evaluates to true'
+        opt_cond_str += (" options: " + str(self.options))
-        )
+        opt_cond_str += (" expression: " + self.eval_expr)
-        return log_cond_str
+        if self.trigger:
            opt_cond_str += (" trigger: " + str(self.trigger))
        return opt_cond_str
 class TimeSeriesCondition(Condition):
    @classmethod
    def create(cls, base_condition):
        base_condition.set_data_source(DataSource.Type['TIME_SERIES'])
        base_condition.__class__ = cls
        return base_condition
    def set_parameter(self, key, value):
        if key == 'keys':
            if isinstance(value, str):
                self.keys = [value]
            else:
                self.keys = value
        elif key == 'behavior':
            self.behavior = TimeSeriesData.Behavior[value]
        elif key == 'rate_threshold':
            self.rate_threshold = float(value)
        elif key == 'window_sec':
            self.window_sec = int(value)
        elif key == 'evaluate':
            self.expression = value
        elif key == 'aggregation_op':
            self.aggregation_op = TimeSeriesData.AggregationOperator[value]
    def perform_checks(self):
        if not self.keys:
            raise ValueError(self.name + ': specify timeseries key')
        if not self.behavior:
            raise ValueError(self.name + ': specify triggering behavior')
        if self.behavior is TimeSeriesData.Behavior.bursty:
            if not self.rate_threshold:
                raise ValueError(self.name + ': specify rate burst threshold')
            if not self.window_sec:
                self.window_sec = 300  # default window length is 5 minutes
            if len(self.keys) > 1:
                raise ValueError(self.name + ': specify only one key')
        elif self.behavior is TimeSeriesData.Behavior.evaluate_expression:
            if not (self.expression):
                raise ValueError(self.name + ': specify evaluation expression')
        else:
            raise ValueError(self.name + ': trigger behavior not supported')
    def __repr__(self):
        ts_cond_str = "TimeSeriesCondition: " + self.name
        ts_cond_str += (" statistics: " + str(self.keys))
        ts_cond_str += (" behavior: " + self.behavior.name)
        if self.behavior is TimeSeriesData.Behavior.bursty:
            ts_cond_str += (" rate_threshold: " + str(self.rate_threshold))
            ts_cond_str += (" window_sec: " + str(self.window_sec))
        if self.behavior is TimeSeriesData.Behavior.evaluate_expression:
            ts_cond_str += (" expression: " + self.expression)
            if hasattr(self, 'aggregation_op'):
                ts_cond_str += (" aggregation_op: " + self.aggregation_op.name)
        if self.trigger:
            ts_cond_str += (" trigger: " + str(self.trigger))
        return ts_cond_str
 class RulesSpec:
    def __init__(self, rules_path):
        self.file_path = rules_path
    def initialise_fields(self):
        self.rules_dict = {}
        self.conditions_dict = {}
        self.suggestions_dict = {}
@ -249,9 +445,13 @@ class RulesSpec:
            sugg.perform_checks()
    def load_rules_from_spec(self):
        self.initialise_fields()
        with open(self.file_path, 'r') as db_rules:
            curr_section = None
            for line in db_rules:
                line = IniParser.remove_trailing_comment(line)
                if not line:
                    continue
                element = IniParser.get_element(line)
                if element is IniParser.Element.comment:
                    continue
@ -277,6 +477,8 @@ class RulesSpec:
                                new_cond = LogCondition.create(new_cond)
                            elif value == 'OPTIONS':
                                new_cond = OptionCondition.create(new_cond)
                            elif value == 'TIME_SERIES':
                                new_cond = TimeSeriesCondition.create(new_cond)
                        else:
                            new_cond.set_parameter(key, value)
                    elif curr_section is IniParser.Element.sugg:
@ -291,75 +493,36 @@ class RulesSpec:
    def get_suggestions_dict(self):
        return self.suggestions_dict
    def get_triggered_rules(self, data_sources, column_families):
        self.trigger_conditions(data_sources)
        triggered_rules = []
        for rule in self.rules_dict.values():
            if rule.is_triggered(self.conditions_dict, column_families):
                triggered_rules.append(rule)
        return triggered_rules
-def trigger_conditions(data_sources, conditions_dict):
+    def trigger_conditions(self, data_sources):
-    for source in data_sources:
+        for source_type in data_sources:
            cond_subset = [
                cond
-            for cond in conditions_dict.values()
+                for cond in self.conditions_dict.values()
-            if cond.get_data_source() is source.type
+                if cond.get_data_source() is source_type
            ]
            if not cond_subset:
                continue
            for source in data_sources[source_type]:
                source.check_and_trigger_conditions(cond_subset)
-
+    def print_rules(self, rules):
-def get_triggered_rules(rules_dict, conditions_dict):
+        for rule in rules:
-    triggered_rules = []
+            print('\nRule: ' + rule.name)
-    for rule in rules_dict.values():
+            for cond_name in rule.conditions:
-        if rule.is_triggered(conditions_dict):
+                print(repr(self.conditions_dict[cond_name]))
-            triggered_rules.append(rule)
+            for sugg_name in rule.suggestions:
-    return triggered_rules
+                print(repr(self.suggestions_dict[sugg_name]))
-
+            if rule.trigger_entities:
-
+                print('scope: entities:')
-def main(args):
+                print(rule.trigger_entities)
-    # Load the rules with their conditions and suggestions.
+            if rule.trigger_column_families:
-    db_rules = RulesSpec(args.rules_spec)
+                print('scope: col_fam:')
-    db_rules.load_rules_from_spec()
+                print(rule.trigger_column_families)
    # Perform some basic sanity checks for each section.
    db_rules.perform_section_checks()
    rules_dict = db_rules.get_rules_dict()
    conditions_dict = db_rules.get_conditions_dict()
    suggestions_dict = db_rules.get_suggestions_dict()
    print()
    print('RULES')
    for rule in rules_dict.values():
        print(repr(rule))
    print()
    print('CONDITIONS')
    for cond in conditions_dict.values():
        print(repr(cond))
    print()
    print('SUGGESTIONS')
    for sugg in suggestions_dict.values():
        print(repr(sugg))
    # Initialise the data sources.
    data_sources = []
    data_sources.append(DatabaseOptions(args.rocksdb_options))
    data_sources.append(DatabaseLogs(args.rocksdb_log_prefix))
    # Initialise the ConditionChecker with the provided data sources.
    trigger_conditions(data_sources, conditions_dict)
    # Check for the conditions read in from the Rules spec, if triggered.
    print()
    triggered_rules = get_triggered_rules(rules_dict, conditions_dict)
    for rule in triggered_rules:
        print('Rule: ' + rule.name + ' has been triggered and:')
        rule_suggestions = rule.get_suggestions()
        for sugg_name in rule_suggestions:
            print(suggestions_dict[sugg_name])
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='This script is used for\
        gauging rocksdb performance using as input: Rocksdb LOG, OPTIONS,\
        performance context, command-line statistics and statistics published\
        on ODS and providing as output: suggestions to improve Rocksdb\
        performance')
    parser.add_argument('--rules_spec', required=True, type=str)
    parser.add_argument('--rocksdb_options', required=True, type=str)
    parser.add_argument('--rocksdb_log_prefix', required=True, type=str)
    args = parser.parse_args()
    main(args)
--- a/tools/advisor/advisor/rules.ini
+++ b/tools/advisor/advisor/rules.ini
@ -3,24 +3,28 @@
 #  COPYING file in the root directory) and Apache 2.0 License
 #  (found in the LICENSE.Apache file in the root directory).
 #
-# This ini file is very similar to the Rocksdb ini file in terms of syntax.
+# FORMAT: very similar to the Rocksdb ini file in terms of syntax
 # (refer rocksdb/examples/rocksdb_option_file_example.ini)
 # It is made up of multiple sections and each section is made up of multiple
 # key-value pairs. Each section must have a name. The recognized sections are
 # Rule, Suggestion, Condition followed by their name in "" that acts as an
 # identifier. There should be at least one Rule section in the file.
 #
-# Each rule must be associated with at least one condition and one suggestion.
+# The Rules INI file is made up of multiple sections and each section is made
-# If a Rule is associated with multiple Conditions, then all the conditions
+# up of multiple key-value pairs. The recognized section types are:
-# must be triggered in order for the Rule to be triggered.
+# Rule, Suggestion, Condition. Each section must have a name specified in ""
-# The suggestions don't have any ordering amongst them as of now.
+# in the section header. This name acts as an identifier in that section
 # type's namespace. A section header looks like:
 # [<section_type> "<section_name_identifier>"]
 #
-# A Condition must be associated to a data source specified by the parameter
+# There should be at least one Rule section in the file with its corresponding
 # Condition and Suggestion sections. A Rule is triggered only when all of its
 # conditions are triggered. The order in which a Rule's conditions and
 # suggestions are specified has no significance.
 #
 # A Condition must be associated with a data source specified by the parameter
 # 'source' and this must be the first parameter specified for the Condition.
 # A condition can be associated with one or more Rules.
 #
-# A suggestion is an advised change to a database or column_family option to
+# A Suggestion is an advised change to a Rocksdb option to improve the
-# improve the performance of the database in some way. Every suggestion is
+# performance of the database in some way. Every suggestion can be a part of
-# is associated with one or more Rules.
+# one or more Rules.
 [Rule "stall-too-many-memtables"]
 suggestions=inc-bg-flush:inc-write-buffer
@ -29,7 +33,6 @@ conditions=stall-too-many-memtables
 [Condition "stall-too-many-memtables"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Rule "stall-too-many-L0"]
 suggestions=inc-max-subcompactions:inc-max-bg-compactions:inc-write-buffer-size:dec-max-bytes-for-level-base:inc-l0-slowdown-writes-trigger
@ -38,7 +41,6 @@ conditions=stall-too-many-L0
 [Condition "stall-too-many-L0"]
 source=LOG
 regex=Stalling writes because we have \d+ level-0 files
 scope=column_family
 [Rule "stop-too-many-L0"]
 suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-l0-stop-writes-trigger
@ -47,7 +49,6 @@ conditions=stop-too-many-L0
 [Condition "stop-too-many-L0"]
 source=LOG
 regex=Stopping writes because we have \d+ level-0 files
 scope=column_family
 [Rule "stall-too-many-compaction-bytes"]
 suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-hard-pending-compaction-bytes-limit:inc-soft-pending-compaction-bytes-limit
@ -56,11 +57,11 @@ conditions=stall-too-many-compaction-bytes
 [Condition "stall-too-many-compaction-bytes"]
 source=LOG
 regex=Stalling writes because of estimated pending compaction bytes \d+
 scope=column_family
 [Suggestion "inc-bg-flush"]
 option=DBOptions.max_background_flushes
 action=increase
 suggested_values=2
 [Suggestion "inc-write-buffer"]
 option=CFOptions.max_write_buffer_number
@ -73,6 +74,7 @@ action=increase
 [Suggestion "inc-max-bg-compactions"]
 option=DBOptions.max_background_compactions
 action=increase
 suggested_values=2
 [Suggestion "inc-write-buffer-size"]
 option=CFOptions.write_buffer_size
@ -100,12 +102,113 @@ action=increase
 [Rule "level0-level1-ratio"]
 conditions=level0-level1-ratio
-suggestions=l0-l1-ratio-health-check
+suggestions=inc-base-max-bytes
 [Condition "level0-level1-ratio"]
 source=OPTIONS
 options=CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size:CFOptions.max_bytes_for_level_base
-evaluate=int(options[0])*int(options[1])-int(options[2])<(-251659456)  # should evaluate to a boolean
+evaluate=int(options[0])*int(options[1])-int(options[2])>=1  # should evaluate to a boolean, condition triggered if evaluates to true
 [Suggestion "inc-base-max-bytes"]
 option=CFOptions.max_bytes_for_level_base
 action=increase
 [Rules "tuning-iostat-burst"]
 conditions=large-db-get-p99
 suggestions=bytes-per-sync-non0:wal-bytes-per-sync-non0:set-rate-limiter
 #overlap_time_period=10m
 [Condition "write-burst"]
 source=TIME_SERIES
 keys=dyno.flash_write_bytes_per_sec
 behavior=bursty
 window_sec=300  # the smaller this window, the more sensitivity to changes in the time series, so the rate_threshold should be bigger; when it's 60, then same as diff(%)
 rate_threshold=20
 [Condition "large-p99-read-latency"]
 source=TIME_SERIES
 keys=[]rocksdb.read.block.get.micros.p99
 behavior=bursty
 window_sec=300
 rate_threshold=10
 [Condition "large-db-get-p99"]
 source=TIME_SERIES
 keys=[]rocksdb.db.get.micros.p50:[]rocksdb.db.get.micros.p99
 behavior=evaluate_expression
 evaluate=(keys[1]/keys[0])>5
 [Suggestion "bytes-per-sync-non0"]
 option=DBOptions.bytes_per_sync
 action=set
 suggested_values=1048576
 [Suggestion "wal-bytes-per-sync-non0"]
 option=DBOptions.wal_bytes_per_sync
 action=set
 suggested_values=1048576
 [Suggestion "set-rate-limiter"]
 option=rate_limiter_bytes_per_sec
 action=set
 suggested_values=1024000
 [Rule "bloom-filter-percent-useful"]
 conditions=bloom-filter-percent-useful
 suggestions=inc-bloom-bits-per-key
 [Condition "bloom-filter-percent-useful"]
 source=TIME_SERIES
 keys=[]rocksdb.bloom.filter.useful.count:[]rocksdb.bloom.filter.full.positive.count:[]rocksdb.bloom.filter.full.true.positive.count
 behavior=evaluate_expression
 evaluate=((keys[0]+keys[2])/(keys[0]+keys[1]))<0.9  # should evaluate to a boolean
 aggregation_op=latest
 [Rule "bloom-not-enabled"]
 conditions=bloom-not-enabled
 suggestions=inc-bloom-bits-per-key
 [Condition "bloom-not-enabled"]
 source=TIME_SERIES
 keys=[]rocksdb.bloom.filter.useful.count:[]rocksdb.bloom.filter.full.positive.count:[]rocksdb.bloom.filter.full.true.positive.count
 behavior=evaluate_expression
 evaluate=keys[0]+keys[1]+keys[2]==0
 aggregation_op=avg
 [Suggestion "inc-bloom-bits-per-key"]
 option=bloom_bits
 action=increase
 suggested_values=2
 [Rule "small-l0-files"]
 conditions=small-l0-files
 suggestions=dec-max-bytes-for-level-base:inc-write-buffer-size
 [Condition "small-l0-files"]
 source=OPTIONS
 options=CFOptions.max_bytes_for_level_base:CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size
 evaluate=int(options[0])>(10*int(options[1])*int(options[2]))
 [Rule "decompress-time-long"]
 conditions=decompress-time-long
 suggestions=dec-block-size:inc-block-cache-size:faster-compression-type
 [Condition "decompress-time-long"]
 source=TIME_SERIES
 keys=block_decompress_time:block_read_time:block_checksum_time
 behavior=evaluate_expression
 evaluate=(keys[0]/(keys[0]+keys[1]+keys[2]))>0.3
 [Suggestion "dec-block-size"]
 option=TableOptions.BlockBasedTable.block_size
 action=decrease
 [Suggestion "inc-block-cache-size"]
 option=cache_size
 action=increase
 suggested_values=16000000
-[Suggestion "l0-l1-ratio-health-check"]
+[Suggestion "faster-compression-type"]
-description='modify options such that (level0_file_num_compaction_trigger * write_buffer_size - max_bytes_for_level_base < 5) is satisfied'
+option=CFOptions.compression
 action=set
 suggested_values=kLZ4Compression
--- a/tools/advisor/test/input_files/LOG-0
+++ b/tools/advisor/test/input_files/LOG-0
@ -23,3 +23,8 @@
 2018/05/25-14:34:21.048592 7f82bd676200 [DEBUG] [db/db_impl_files.cc:261] [JOB 45] Delete /tmp/rocksdbtest-155919/dbbench/000084.sst type=2 #84 -- OK
 2018/05/25-14:34:21.048603 7f82bd676200 EVENT_LOG_v1 {"time_micros": 1527284061048600, "job": 45, "event": "table_file_deletion", "file_number": 84}
 2018/05/25-14:34:21.048981 7f82bd676200 [db/db_impl.cc:398] Shutdown complete
 2018/05/25-14:34:21.049000 7f82bd676200 [db/db_impl.cc:563] [col-fam-A] random log message for testing
 2018/05/25-14:34:21.049010 7f82bd676200 [db/db_impl.cc:234] [col-fam-B] log continuing on next line
 remaining part of the log
 2018/05/25-14:34:21.049020 7f82bd676200 [db/db_impl.cc:653] [col-fam-A] another random log message
 2018/05/25-14:34:21.049025 7f82bd676200 [db/db_impl.cc:331] [unknown] random log message no column family
--- a/tools/advisor/test/input_files/rules_err1.ini
+++ b/tools/advisor/test/input_files/rules_err1.ini
@ -5,7 +5,6 @@ conditions=missing-source
 [Condition "normal-rule"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Suggestion "inc-bg-flush"]
 option=DBOptions.max_background_flushes
@ -43,7 +42,6 @@ conditions=missing-regex
 [Condition "missing-regex"]
 source=LOG
 regex=
 scope=column_family
 [Suggestion "missing-option"]
 option=
--- a/tools/advisor/test/input_files/rules_err2.ini
+++ b/tools/advisor/test/input_files/rules_err2.ini
@ -5,7 +5,6 @@ conditions=missing-source
 [Condition "missing-source"]
 source=
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Suggestion "inc-bg-flush"]
 option=DBOptions.max_background_flushes
--- a/tools/advisor/test/input_files/rules_err3.ini
+++ b/tools/advisor/test/input_files/rules_err3.ini
@ -5,7 +5,6 @@ conditions=missing-source
 [Condition "normal-condition"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Suggestion "missing-action"]
 option=DBOptions.max_background_flushes
--- a/tools/advisor/test/input_files/rules_err4.ini
+++ b/tools/advisor/test/input_files/rules_err4.ini
@ -5,7 +5,6 @@ conditions=missing-source
 [Condition "normal-condition"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Suggestion "inc-bg-flush"]
 option=DBOptions.max_background_flushes
--- a/tools/advisor/test/input_files/test_rules.ini
+++ b/tools/advisor/test/input_files/test_rules.ini
@ -17,22 +17,18 @@ conditions=log-4-false:options-1-false
 [Condition "log-1-true"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 scope=column_family
 [Condition "log-2-true"]
 source=LOG
 regex=Stalling writes because we have \d+ level-0 files
 scope=column_family
 [Condition "log-3-true"]
 source=LOG
 regex=Stopping writes because we have \d+ level-0 files
 scope=column_family
 [Condition "log-4-false"]
 source=LOG
 regex=Stalling writes because of estimated pending compaction bytes \d+
 scope=column_family
 [Condition "options-1-false"]
 source=OPTIONS
--- a/tools/advisor/test/input_files/triggered_rules.ini
+++ b/tools/advisor/test/input_files/triggered_rules.ini
@ -0,0 +1,83 @@
 [Rule "stall-too-many-memtables"]
 suggestions=inc-bg-flush:inc-write-buffer
 conditions=stall-too-many-memtables
 [Condition "stall-too-many-memtables"]
 source=LOG
 regex=Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+
 [Rule "stall-too-many-L0"]
 suggestions=inc-max-subcompactions:inc-max-bg-compactions:inc-write-buffer-size:dec-max-bytes-for-level-base:inc-l0-slowdown-writes-trigger
 conditions=stall-too-many-L0
 [Condition "stall-too-many-L0"]
 source=LOG
 regex=Stalling writes because we have \d+ level-0 files
 [Rule "stop-too-many-L0"]
 suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-l0-stop-writes-trigger
 conditions=stop-too-many-L0
 [Condition "stop-too-many-L0"]
 source=LOG
 regex=Stopping writes because we have \d+ level-0 files
 [Rule "stall-too-many-compaction-bytes"]
 suggestions=inc-max-bg-compactions:inc-write-buffer-size:inc-hard-pending-compaction-bytes-limit:inc-soft-pending-compaction-bytes-limit
 conditions=stall-too-many-compaction-bytes
 [Condition "stall-too-many-compaction-bytes"]
 source=LOG
 regex=Stalling writes because of estimated pending compaction bytes \d+
 [Suggestion "inc-bg-flush"]
 option=DBOptions.max_background_flushes
 action=increase
 [Suggestion "inc-write-buffer"]
 option=CFOptions.max_write_buffer_number
 action=increase
 [Suggestion "inc-max-subcompactions"]
 option=DBOptions.max_subcompactions
 action=increase
 [Suggestion "inc-max-bg-compactions"]
 option=DBOptions.max_background_compactions
 action=increase
 [Suggestion "inc-write-buffer-size"]
 option=CFOptions.write_buffer_size
 action=increase
 [Suggestion "dec-max-bytes-for-level-base"]
 option=CFOptions.max_bytes_for_level_base
 action=decrease
 [Suggestion "inc-l0-slowdown-writes-trigger"]
 option=CFOptions.level0_slowdown_writes_trigger
 action=increase
 [Suggestion "inc-l0-stop-writes-trigger"]
 option=CFOptions.level0_stop_writes_trigger
 action=increase
 [Suggestion "inc-hard-pending-compaction-bytes-limit"]
 option=CFOptions.hard_pending_compaction_bytes_limit
 action=increase
 [Suggestion "inc-soft-pending-compaction-bytes-limit"]
 option=CFOptions.soft_pending_compaction_bytes_limit
 action=increase
 [Rule "level0-level1-ratio"]
 conditions=level0-level1-ratio
 suggestions=l0-l1-ratio-health-check
 [Condition "level0-level1-ratio"]
 source=OPTIONS
 options=CFOptions.level0_file_num_compaction_trigger:CFOptions.write_buffer_size:CFOptions.max_bytes_for_level_base
 evaluate=int(options[0])*int(options[1])-int(options[2])>=-268173312  # should evaluate to a boolean, condition triggered if evaluates to true
 [Suggestion "l0-l1-ratio-health-check"]
 description='modify options such that (level0_file_num_compaction_trigger * write_buffer_size - max_bytes_for_level_base < -268173312) is satisfied'
--- a/tools/advisor/test/test_db_log_parser.py
+++ b/tools/advisor/test/test_db_log_parser.py
@ -0,0 +1,98 @@
 from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
 from advisor.rule_parser import Condition, LogCondition
 import os
 import unittest
 class TestLog(unittest.TestCase):
    def setUp(self):
        self.column_families = ['default', 'col_fam_A']
    def test_get_column_family(self):
        test_log = (
            "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
            "[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK"
        )
        db_log = Log(test_log, self.column_families)
        self.assertEqual('col_fam_A', db_log.get_column_family())
        test_log = (
            "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
            "[JOB 44] Level-0 flush table #84: 1890780 bytes OK"
        )
        db_log = Log(test_log, self.column_families)
        db_log.append_message('[default] some remaining part of log')
        self.assertEqual(NO_COL_FAMILY, db_log.get_column_family())
    def test_get_methods(self):
        hr_time = "2018/05/25-14:30:25.491635"
        context = "7f82ba72e700"
        message = (
            "[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " +
            "#23: started"
        )
        test_log = hr_time + " " + context + " " + message
        db_log = Log(test_log, self.column_families)
        self.assertEqual(db_log.get_message(), message)
        remaining_message = "[col_fam_A] some more logs"
        db_log.append_message(remaining_message)
        self.assertEqual(
            db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635"
        )
        self.assertEqual(db_log.get_context(), "7f82ba72e700")
        self.assertEqual(db_log.get_timestamp(), 1527258625)
        self.assertEqual(
            db_log.get_message(), str(message + '\n' + remaining_message)
        )
    def test_is_new_log(self):
        new_log = "2018/05/25-14:34:21.047233 context random new log"
        remaining_log = "2018/05/25 not really a new log"
        self.assertTrue(Log.is_new_log(new_log))
        self.assertFalse(Log.is_new_log(remaining_log))
 class TestDatabaseLogs(unittest.TestCase):
    def test_check_and_trigger_conditions(self):
        this_path = os.path.abspath(os.path.dirname(__file__))
        logs_path_prefix = os.path.join(this_path, 'input_files/LOG-0')
        column_families = ['default', 'col-fam-A', 'col-fam-B']
        db_logs = DatabaseLogs(logs_path_prefix, column_families)
        # matches, has 2 col_fams
        condition1 = LogCondition.create(Condition('cond-A'))
        condition1.set_parameter('regex', 'random log message')
        # matches, multiple lines message
        condition2 = LogCondition.create(Condition('cond-B'))
        condition2.set_parameter('regex', 'continuing on next line')
        # does not match
        condition3 = LogCondition.create(Condition('cond-C'))
        condition3.set_parameter('regex', 'this should match no log')
        db_logs.check_and_trigger_conditions(
            [condition1, condition2, condition3]
        )
        cond1_trigger = condition1.get_trigger()
        self.assertEqual(2, len(cond1_trigger.keys()))
        self.assertSetEqual(
            {'col-fam-A', NO_COL_FAMILY}, set(cond1_trigger.keys())
        )
        self.assertEqual(2, len(cond1_trigger['col-fam-A']))
        messages = [
            "[db/db_impl.cc:563] [col-fam-A] random log message for testing",
            "[db/db_impl.cc:653] [col-fam-A] another random log message"
        ]
        self.assertIn(cond1_trigger['col-fam-A'][0].get_message(), messages)
        self.assertIn(cond1_trigger['col-fam-A'][1].get_message(), messages)
        self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY]))
        self.assertEqual(
            cond1_trigger[NO_COL_FAMILY][0].get_message(),
            "[db/db_impl.cc:331] [unknown] random log message no column family"
        )
        cond2_trigger = condition2.get_trigger()
        self.assertEqual(['col-fam-B'], list(cond2_trigger.keys()))
        self.assertEqual(1, len(cond2_trigger['col-fam-B']))
        self.assertEqual(
            cond2_trigger['col-fam-B'][0].get_message(),
            "[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" +
            "remaining part of the log"
        )
        self.assertIsNone(condition3.get_trigger())
--- a/tools/advisor/test/test_rule_parser.py
+++ b/tools/advisor/test/test_rule_parser.py
@ -5,8 +5,9 @@
 import os
 import unittest
-from advisor.rule_parser import RulesSpec, DatabaseLogs, DatabaseOptions
+from advisor.rule_parser import RulesSpec
-from advisor.rule_parser import get_triggered_rules, trigger_conditions
+from advisor.db_log_parser import DatabaseLogs, DataSource
 from advisor.db_options_parser import DatabaseOptions
 RuleToSuggestions = {
    "stall-too-many-memtables": [
@ -41,16 +42,17 @@ class TestAllRulesTriggered(unittest.TestCase):
    def setUp(self):
        # load the Rules
        this_path = os.path.abspath(os.path.dirname(__file__))
-        ini_path = os.path.join(this_path, '../advisor/rules.ini')
+        ini_path = os.path.join(this_path, 'input_files/triggered_rules.ini')
        self.db_rules = RulesSpec(ini_path)
        self.db_rules.load_rules_from_spec()
        self.db_rules.perform_section_checks()
        # load the data sources: LOG and OPTIONS
        log_path = os.path.join(this_path, 'input_files/LOG-0')
        options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
-        self.data_sources = []
+        db_options_parser = DatabaseOptions(options_path)
-        self.data_sources.append(DatabaseOptions(options_path))
+        self.column_families = db_options_parser.get_column_families()
-        self.data_sources.append(DatabaseLogs(log_path))
+        db_logs_parser = DatabaseLogs(log_path, self.column_families)
        self.data_sources = [db_options_parser, db_logs_parser]
    def test_triggered_conditions(self):
        conditions_dict = self.db_rules.get_conditions_dict()
@ -59,18 +61,25 @@ class TestAllRulesTriggered(unittest.TestCase):
        for cond in conditions_dict.values():
            self.assertFalse(cond.is_triggered(), repr(cond))
        for rule in rules_dict.values():
-            self.assertFalse(rule.is_triggered(conditions_dict), repr(rule))
+            self.assertFalse(
                rule.is_triggered(conditions_dict, self.column_families),
                repr(rule)
            )
-        # Trigger the conditions as per the data sources.
+        # # Trigger the conditions as per the data sources.
-        trigger_conditions(self.data_sources, conditions_dict)
+        # trigger_conditions(, conditions_dict)
        # Get the set of rules that have been triggered
        triggered_rules = self.db_rules.get_triggered_rules(
            self.data_sources, self.column_families
        )
        # Make sure each condition and rule is triggered
        for cond in conditions_dict.values():
            if cond.get_data_source() is DataSource.Type.TIME_SERIES:
                continue
            self.assertTrue(cond.is_triggered(), repr(cond))
        # Get the set of rules that have been triggered
        triggered_rules = get_triggered_rules(rules_dict, conditions_dict)
        for rule in rules_dict.values():
            self.assertIn(rule, triggered_rules)
            # Check the suggestions made by the triggered rules
@ -94,9 +103,10 @@ class TestConditionsConjunctions(unittest.TestCase):
        # load the data sources: LOG and OPTIONS
        log_path = os.path.join(this_path, 'input_files/LOG-1')
        options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
-        self.data_sources = []
+        db_options_parser = DatabaseOptions(options_path)
-        self.data_sources.append(DatabaseOptions(options_path))
+        self.column_families = db_options_parser.get_column_families()
-        self.data_sources.append(DatabaseLogs(log_path))
+        db_logs_parser = DatabaseLogs(log_path, self.column_families)
        self.data_sources = [db_options_parser, db_logs_parser]
    def test_condition_conjunctions(self):
        conditions_dict = self.db_rules.get_conditions_dict()
@ -105,10 +115,13 @@ class TestConditionsConjunctions(unittest.TestCase):
        for cond in conditions_dict.values():
            self.assertFalse(cond.is_triggered(), repr(cond))
        for rule in rules_dict.values():
-            self.assertFalse(rule.is_triggered(conditions_dict), repr(rule))
+            self.assertFalse(
                rule.is_triggered(conditions_dict, self.column_families),
                repr(rule)
            )
        # Trigger the conditions as per the data sources.
-        trigger_conditions(self.data_sources, conditions_dict)
+        self.db_rules.trigger_conditions(self.data_sources)
        # Check for the conditions
        conds_triggered = ['log-1-true', 'log-2-true', 'log-3-true']
@ -125,14 +138,16 @@ class TestConditionsConjunctions(unittest.TestCase):
            'multiple-conds-one-false',
            'multiple-conds-all-false'
        ]
-        for rule in rules_triggered:
+        for rule_name in rules_triggered:
            rule = rules_dict[rule_name]
            self.assertTrue(
-                rules_dict[rule].is_triggered(conditions_dict),
+                rule.is_triggered(conditions_dict, self.column_families),
                repr(rule)
            )
-        for rule in rules_not_triggered:
+        for rule_name in rules_not_triggered:
            rule = rules_dict[rule_name]
            self.assertFalse(
-                rules_dict[rule].is_triggered(conditions_dict),
+                rule.is_triggered(conditions_dict, self.column_families),
                repr(rule)
            )
@ -191,7 +206,7 @@ class TestParsingErrors(unittest.TestCase):
        ini_path = os.path.join(self.this_path, 'input_files/rules_err2.ini')
        db_rules = RulesSpec(ini_path)
        regex = '.*provide source for condition.*'
-        with self.assertRaisesRegex(ValueError, regex):
+        with self.assertRaisesRegex(NotImplementedError, regex):
            db_rules.load_rules_from_spec()
    def test_suggestion_missing_action(self):
@ -204,7 +219,7 @@ class TestParsingErrors(unittest.TestCase):
    def test_section_no_name(self):
        ini_path = os.path.join(self.this_path, 'input_files/rules_err4.ini')
        db_rules = RulesSpec(ini_path)
-        regex = 'Parsing error: section header be like:.*'
+        regex = 'Parsing error: needed section header:.*'
        with self.assertRaisesRegex(ValueError, regex):
            db_rules.load_rules_from_spec()