diff --git a/buckifier/buckify_rocksdb.py b/buckifier/buckify_rocksdb.py index 46514146d..ac09c0519 100755 --- a/buckifier/buckify_rocksdb.py +++ b/buckifier/buckify_rocksdb.py @@ -1,19 +1,18 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals + try: from builtins import str except ImportError: from __builtin__ import str -from targets_builder import TARGETSBuilder +import fnmatch import json import os -import fnmatch import sys +from targets_builder import TARGETSBuilder + from util import ColorString # This script generates TARGETS file for Buck. @@ -44,13 +43,13 @@ def parse_src_mk(repo_path): src_files = {} for line in open(src_mk): line = line.strip() - if len(line) == 0 or line[0] == '#': + if len(line) == 0 or line[0] == "#": continue - if '=' in line: - current_src = line.split('=')[0].strip() + if "=" in line: + current_src = line.split("=")[0].strip() src_files[current_src] = [] - elif '.c' in line: - src_path = line.split('\\')[0].strip() + elif ".c" in line: + src_path = line.split("\\")[0].strip() src_files[current_src].append(src_path) return src_files @@ -58,14 +57,16 @@ def parse_src_mk(repo_path): # get all .cc / .c files def get_cc_files(repo_path): cc_files = [] - for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in - root = root[(len(repo_path) + 1):] + for root, _dirnames, filenames in os.walk( + repo_path + ): # noqa: B007 T25377293 Grandfathered in + root = root[(len(repo_path) + 1) :] if "java" in root: # Skip java continue - for filename in fnmatch.filter(filenames, '*.cc'): + for filename in fnmatch.filter(filenames, "*.cc"): cc_files.append(os.path.join(root, filename)) - for filename in fnmatch.filter(filenames, '*.c'): + for filename in fnmatch.filter(filenames, "*.c"): cc_files.append(os.path.join(root, filename)) return cc_files @@ -93,14 +94,10 @@ def get_non_parallel_tests(repo_path): return s + # Parse extra dependencies passed by user from command line def get_dependencies(): - deps_map = { - '': { - 'extra_deps': [], - 'extra_compiler_flags': [] - } - } + deps_map = {"": {"extra_deps": [], "extra_compiler_flags": []}} if len(sys.argv) < 2: return deps_map @@ -111,6 +108,7 @@ def get_dependencies(): v = encode_dict(v) rv[k] = v return rv + extra_deps = json.loads(sys.argv[1], object_hook=encode_dict) for target_alias, deps in extra_deps.items(): deps_map[target_alias] = deps @@ -143,73 +141,73 @@ def generate_targets(repo_path, deps_map): "rocksdb_lib", src_mk["LIB_SOURCES"] + # always add range_tree, it's only excluded on ppc64, which we don't use internally - src_mk["RANGE_TREE_SOURCES"] + - src_mk["TOOL_LIB_SOURCES"], + src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"], deps=[ "//folly/container:f14_hash", "//folly/experimental/coro:blocking_wait", "//folly/experimental/coro:collect", "//folly/experimental/coro:coroutine", "//folly/experimental/coro:task", - "//folly/synchronization:distributed_mutex"]) + "//folly/synchronization:distributed_mutex", + ], + ) # rocksdb_whole_archive_lib TARGETS.add_library( "rocksdb_whole_archive_lib", src_mk["LIB_SOURCES"] + # always add range_tree, it's only excluded on ppc64, which we don't use internally - src_mk["RANGE_TREE_SOURCES"] + - src_mk["TOOL_LIB_SOURCES"], + src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"], deps=[ "//folly/container:f14_hash", "//folly/experimental/coro:blocking_wait", "//folly/experimental/coro:collect", "//folly/experimental/coro:coroutine", "//folly/experimental/coro:task", - "//folly/synchronization:distributed_mutex"], + "//folly/synchronization:distributed_mutex", + ], headers=None, extra_external_deps="", - link_whole=True) + link_whole=True, + ) # rocksdb_test_lib TARGETS.add_library( "rocksdb_test_lib", - src_mk.get("MOCK_LIB_SOURCES", []) + - src_mk.get("TEST_LIB_SOURCES", []) + - src_mk.get("EXP_LIB_SOURCES", []) + - src_mk.get("ANALYZER_LIB_SOURCES", []), + src_mk.get("MOCK_LIB_SOURCES", []) + + src_mk.get("TEST_LIB_SOURCES", []) + + src_mk.get("EXP_LIB_SOURCES", []) + + src_mk.get("ANALYZER_LIB_SOURCES", []), [":rocksdb_lib"], - extra_test_libs=True - ) + extra_test_libs=True, + ) # rocksdb_tools_lib TARGETS.add_library( "rocksdb_tools_lib", - src_mk.get("BENCH_LIB_SOURCES", []) + - src_mk.get("ANALYZER_LIB_SOURCES", []) + - ["test_util/testutil.cc"], - [":rocksdb_lib"]) + src_mk.get("BENCH_LIB_SOURCES", []) + + src_mk.get("ANALYZER_LIB_SOURCES", []) + + ["test_util/testutil.cc"], + [":rocksdb_lib"], + ) # rocksdb_cache_bench_tools_lib TARGETS.add_library( "rocksdb_cache_bench_tools_lib", src_mk.get("CACHE_BENCH_LIB_SOURCES", []), - [":rocksdb_lib"]) + [":rocksdb_lib"], + ) # rocksdb_stress_lib TARGETS.add_rocksdb_library( "rocksdb_stress_lib", src_mk.get("ANALYZER_LIB_SOURCES", []) - + src_mk.get('STRESS_LIB_SOURCES', []) - + ["test_util/testutil.cc"]) + + src_mk.get("STRESS_LIB_SOURCES", []) + + ["test_util/testutil.cc"], + ) # db_stress binary - TARGETS.add_binary("db_stress", - ["db_stress_tool/db_stress.cc"], - [":rocksdb_stress_lib"]) + TARGETS.add_binary( + "db_stress", ["db_stress_tool/db_stress.cc"], [":rocksdb_stress_lib"] + ) # bench binaries for src in src_mk.get("MICROBENCH_SOURCES", []): - name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0] - TARGETS.add_binary( - name, - [src], - [], - extra_bench_libs=True - ) + name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0] + TARGETS.add_binary(name, [src], [], extra_bench_libs=True) print("Extra dependencies:\n{0}".format(json.dumps(deps_map))) # Dictionary test executable name -> relative source file path @@ -219,7 +217,7 @@ def generate_targets(repo_path, deps_map): # are more than one .c test file, we need to extend # TARGETS.add_c_test() to include other C tests too. for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []): - if test_src != 'db/c_test.c': + if test_src != "db/c_test.c": print("Don't know how to deal with " + test_src) return False TARGETS.add_c_test() @@ -229,7 +227,7 @@ def generate_targets(repo_path, deps_map): fast_fancy_bench_config_list = json.load(json_file) for config_dict in fast_fancy_bench_config_list: clean_benchmarks = {} - benchmarks = config_dict['benchmarks'] + benchmarks = config_dict["benchmarks"] for binary, benchmark_dict in benchmarks.items(): clean_benchmarks[binary] = {} for benchmark, overloaded_metric_list in benchmark_dict.items(): @@ -237,13 +235,20 @@ def generate_targets(repo_path, deps_map): for metric in overloaded_metric_list: if not isinstance(metric, dict): clean_benchmarks[binary][benchmark].append(metric) - TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold']) + TARGETS.add_fancy_bench_config( + config_dict["name"], + clean_benchmarks, + False, + config_dict["expected_runtime_one_iter"], + config_dict["sl_iterations"], + config_dict["regression_threshold"], + ) with open(f"{repo_path}/buckifier/bench-slow.json") as json_file: slow_fancy_bench_config_list = json.load(json_file) for config_dict in slow_fancy_bench_config_list: clean_benchmarks = {} - benchmarks = config_dict['benchmarks'] + benchmarks = config_dict["benchmarks"] for binary, benchmark_dict in benchmarks.items(): clean_benchmarks[binary] = {} for benchmark, overloaded_metric_list in benchmark_dict.items(): @@ -252,7 +257,14 @@ def generate_targets(repo_path, deps_map): if not isinstance(metric, dict): clean_benchmarks[binary][benchmark].append(metric) for config_dict in slow_fancy_bench_config_list: - TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold']) + TARGETS.add_fancy_bench_config( + config_dict["name"] + "_slow", + clean_benchmarks, + True, + config_dict["expected_runtime_one_iter"], + config_dict["sl_iterations"], + config_dict["regression_threshold"], + ) # it is better servicelab experiments break # than rocksdb github ci except Exception: @@ -261,7 +273,7 @@ def generate_targets(repo_path, deps_map): TARGETS.add_test_header() for test_src in src_mk.get("TEST_MAIN_SOURCES", []): - test = test_src.split('.c')[0].strip().split('/')[-1].strip() + test = test_src.split(".c")[0].strip().split("/")[-1].strip() test_source_map[test] = test_src print("" + test + " " + test_src) @@ -271,23 +283,29 @@ def generate_targets(repo_path, deps_map): print(ColorString.warning("Failed to get test name for %s" % test_src)) continue - test_target_name = \ - test if not target_alias else test + "_" + target_alias + test_target_name = test if not target_alias else test + "_" + target_alias if test in _EXPORTED_TEST_LIBS: test_library = "%s_lib" % test_target_name - TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True) + TARGETS.add_library( + test_library, + [test_src], + deps=[":rocksdb_test_lib"], + extra_test_libs=True, + ) TARGETS.register_test( test_target_name, test_src, - deps = json.dumps(deps['extra_deps'] + [':'+test_library]), - extra_compiler_flags = json.dumps(deps['extra_compiler_flags'])) + deps=json.dumps(deps["extra_deps"] + [":" + test_library]), + extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]), + ) else: TARGETS.register_test( test_target_name, test_src, - deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ), - extra_compiler_flags = json.dumps(deps['extra_compiler_flags'])) + deps=json.dumps(deps["extra_deps"] + [":rocksdb_test_lib"]), + extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]), + ) print(ColorString.info("Generated TARGETS Summary:")) print(ColorString.info("- %d libs" % TARGETS.total_lib)) @@ -300,8 +318,7 @@ def get_rocksdb_path(): # rocksdb = {script_dir}/.. script_dir = os.path.dirname(sys.argv[0]) script_dir = os.path.abspath(script_dir) - rocksdb_path = os.path.abspath( - os.path.join(script_dir, "../")) + rocksdb_path = os.path.abspath(os.path.join(script_dir, "../")) return rocksdb_path @@ -318,5 +335,6 @@ def main(): if not ok: exit_with_error("Failed to generate TARGETS files") + if __name__ == "__main__": main() diff --git a/buckifier/targets_builder.py b/buckifier/targets_builder.py index fbda36ea4..343b2207d 100644 --- a/buckifier/targets_builder.py +++ b/buckifier/targets_builder.py @@ -1,113 +1,150 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals + try: - from builtins import object - from builtins import str + from builtins import object, str except ImportError: - from __builtin__ import object - from __builtin__ import str -import targets_cfg + from __builtin__ import object, str import pprint +import targets_cfg + + def pretty_list(lst, indent=8): if lst is None or len(lst) == 0: return "" if len(lst) == 1: - return "\"%s\"" % lst[0] + return '"%s"' % lst[0] - separator = "\",\n%s\"" % (" " * indent) + separator = '",\n%s"' % (" " * indent) res = separator.join(sorted(lst)) - res = "\n" + (" " * indent) + "\"" + res + "\",\n" + (" " * (indent - 4)) + res = "\n" + (" " * indent) + '"' + res + '",\n' + (" " * (indent - 4)) return res class TARGETSBuilder(object): def __init__(self, path, extra_argv): self.path = path - self.targets_file = open(path, 'wb') header = targets_cfg.rocksdb_target_header_template.format( - extra_argv=extra_argv) - self.targets_file.write(header.encode("utf-8")) + extra_argv=extra_argv + ) + with open(path, "wb") as targets_file: + targets_file.write(header.encode("utf-8")) self.total_lib = 0 self.total_bin = 0 self.total_test = 0 self.tests_cfg = "" - def __del__(self): - self.targets_file.close() - - def add_library(self, name, srcs, deps=None, headers=None, - extra_external_deps="", link_whole=False, - external_dependencies=None, extra_test_libs=False): + def add_library( + self, + name, + srcs, + deps=None, + headers=None, + extra_external_deps="", + link_whole=False, + external_dependencies=None, + extra_test_libs=False, + ): if headers is not None: headers = "[" + pretty_list(headers) + "]" - self.targets_file.write(targets_cfg.library_template.format( - name=name, - srcs=pretty_list(srcs), - headers=headers, - deps=pretty_list(deps), - extra_external_deps=extra_external_deps, - link_whole=link_whole, - external_dependencies=pretty_list(external_dependencies), - extra_test_libs=extra_test_libs - ).encode("utf-8")) + with open(self.path, "ab") as targets_file: + targets_file.write( + targets_cfg.library_template.format( + name=name, + srcs=pretty_list(srcs), + headers=headers, + deps=pretty_list(deps), + extra_external_deps=extra_external_deps, + link_whole=link_whole, + external_dependencies=pretty_list(external_dependencies), + extra_test_libs=extra_test_libs, + ).encode("utf-8") + ) self.total_lib = self.total_lib + 1 - def add_rocksdb_library(self, name, srcs, headers=None, - external_dependencies=None): + def add_rocksdb_library(self, name, srcs, headers=None, external_dependencies=None): if headers is not None: headers = "[" + pretty_list(headers) + "]" - self.targets_file.write(targets_cfg.rocksdb_library_template.format( - name=name, - srcs=pretty_list(srcs), - headers=headers, - external_dependencies=pretty_list(external_dependencies) - ).encode("utf-8") + with open(self.path, "ab") as targets_file: + targets_file.write( + targets_cfg.rocksdb_library_template.format( + name=name, + srcs=pretty_list(srcs), + headers=headers, + external_dependencies=pretty_list(external_dependencies), + ).encode("utf-8") ) self.total_lib = self.total_lib + 1 - def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False): - self.targets_file.write(targets_cfg.binary_template.format( - name=name, - srcs=pretty_list(srcs), - deps=pretty_list(deps), - extra_preprocessor_flags=pretty_list(extra_preprocessor_flags), - extra_bench_libs=extra_bench_libs, - ).encode("utf-8")) + def add_binary( + self, + name, + srcs, + deps=None, + extra_preprocessor_flags=None, + extra_bench_libs=False, + ): + with open(self.path, "ab") as targets_file: + targets_file.write( + targets_cfg.binary_template.format( + name=name, + srcs=pretty_list(srcs), + deps=pretty_list(deps), + extra_preprocessor_flags=pretty_list(extra_preprocessor_flags), + extra_bench_libs=extra_bench_libs, + ).encode("utf-8") + ) self.total_bin = self.total_bin + 1 def add_c_test(self): - self.targets_file.write(b""" + with open(self.path, "ab") as targets_file: + targets_file.write( + b""" add_c_test_wrapper() -""") +""" + ) def add_test_header(self): - self.targets_file.write(b""" + with open(self.path, "ab") as targets_file: + targets_file.write( + b""" # Generate a test rule for each entry in ROCKS_TESTS # Do not build the tests in opt mode, since SyncPoint and other test code # will not be included. -""") +""" + ) - def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold): - self.targets_file.write(targets_cfg.fancy_bench_template.format( + def add_fancy_bench_config( + self, + name, + bench_config, + slow, + expected_runtime, + sl_iterations, + regression_threshold, + ): + with open(self.path, "ab") as targets_file: + targets_file.write( + targets_cfg.fancy_bench_template.format( name=name, bench_config=pprint.pformat(bench_config), slow=slow, expected_runtime=expected_runtime, sl_iterations=sl_iterations, - regression_threshold=regression_threshold - ).encode("utf-8")) - - def register_test(self, - test_name, - src, - deps, - extra_compiler_flags): + regression_threshold=regression_threshold, + ).encode("utf-8") + ) - self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps, - extra_compiler_flags=extra_compiler_flags).encode("utf-8")) + def register_test(self, test_name, src, deps, extra_compiler_flags): + with open(self.path, "ab") as targets_file: + targets_file.write( + targets_cfg.unittests_template.format( + test_name=test_name, + test_cc=str(src), + deps=deps, + extra_compiler_flags=extra_compiler_flags, + ).encode("utf-8") + ) self.total_test = self.total_test + 1 diff --git a/buckifier/targets_cfg.py b/buckifier/targets_cfg.py index dcb64d3b6..491c34d6e 100644 --- a/buckifier/targets_cfg.py +++ b/buckifier/targets_cfg.py @@ -1,11 +1,7 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals -rocksdb_target_header_template = \ - """# This file \100generated by: +rocksdb_target_header_template = """# This file \100generated by: #$ python3 buckifier/buckify_rocksdb.py{extra_argv} # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can @@ -27,7 +23,6 @@ rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers}) """ - binary_template = """ cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs}) """ diff --git a/buckifier/util.py b/buckifier/util.py index f04929a27..8943fed2b 100644 --- a/buckifier/util.py +++ b/buckifier/util.py @@ -2,37 +2,35 @@ """ This module keeps commonly used components. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals + try: from builtins import object except ImportError: from __builtin__ import object +import os import subprocess import sys -import os import time + class ColorString(object): - """ Generate colorful strings on terminal """ - HEADER = '\033[95m' - BLUE = '\033[94m' - GREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' + """Generate colorful strings on terminal""" + + HEADER = "\033[95m" + BLUE = "\033[94m" + GREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" @staticmethod def _make_color_str(text, color): # In Python2, default encoding for unicode string is ASCII if sys.version_info.major <= 2: - return "".join( - [color, text.encode('utf-8'), ColorString.ENDC]) + return "".join([color, text.encode("utf-8"), ColorString.ENDC]) # From Python3, default encoding for unicode string is UTF-8 - return "".join( - [color, text, ColorString.ENDC]) + return "".join([color, text, ColorString.ENDC]) @staticmethod def ok(text): @@ -68,37 +66,38 @@ class ColorString(object): def run_shell_command(shell_cmd, cmd_dir=None): - """ Run a single shell command. - @returns a tuple of shell command return code, stdout, stderr """ + """Run a single shell command. + @returns a tuple of shell command return code, stdout, stderr""" if cmd_dir is not None and not os.path.exists(cmd_dir): run_shell_command("mkdir -p %s" % cmd_dir) start = time.time() print("\t>>> Running: " + shell_cmd) - p = subprocess.Popen(shell_cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=cmd_dir) + p = subprocess.Popen( # noqa + shell_cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=cmd_dir, + ) stdout, stderr = p.communicate() end = time.time() # Report time if we spent more than 5 minutes executing a command execution_time = end - start if execution_time > (60 * 5): - mins = (execution_time / 60) - secs = (execution_time % 60) + mins = execution_time / 60 + secs = execution_time % 60 print("\t>time spent: %d minutes %d seconds" % (mins, secs)) - return p.returncode, stdout, stderr def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False): - """ Execute a sequence of shell commands, which is equivalent to - running `cmd1 && cmd2 && cmd3` - @returns boolean indication if all commands succeeds. + """Execute a sequence of shell commands, which is equivalent to + running `cmd1 && cmd2 && cmd3` + @returns boolean indication if all commands succeeds. """ if cmd_dir: diff --git a/build_tools/amalgamate.py b/build_tools/amalgamate.py index c5cbb3f0f..f79e9075e 100755 --- a/build_tools/amalgamate.py +++ b/build_tools/amalgamate.py @@ -28,14 +28,15 @@ from __future__ import print_function import argparse -from os import path import re import sys +from os import path include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$') included = set() excluded = set() + def find_header(name, abs_path, include_paths): samedir = path.join(path.dirname(abs_path), name) if path.exists(samedir): @@ -46,17 +47,31 @@ def find_header(name, abs_path, include_paths): return include_path return None -def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths): + +def expand_include( + include_path, + f, + abs_path, + source_out, + header_out, + include_paths, + public_include_paths, +): if include_path in included: return False included.add(include_path) with open(include_path) as f: print('#line 1 "{}"'.format(include_path), file=source_out) - process_file(f, include_path, source_out, header_out, include_paths, public_include_paths) + process_file( + f, include_path, source_out, header_out, include_paths, public_include_paths + ) return True -def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths): + +def process_file( + f, abs_path, source_out, header_out, include_paths, public_include_paths +): for (line, text) in enumerate(f): m = include_re.match(text) if m: @@ -68,7 +83,15 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl source_out.write(text) expanded = False else: - expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths) + expanded = expand_include( + include_path, + f, + abs_path, + source_out, + header_out, + include_paths, + public_include_paths, + ) else: # now try public headers include_path = find_header(filename, abs_path, public_include_paths) @@ -78,23 +101,52 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl if include_path in excluded: source_out.write(text) else: - expand_include(include_path, f, abs_path, header_out, None, public_include_paths, []) + expand_include( + include_path, + f, + abs_path, + header_out, + None, + public_include_paths, + [], + ) else: - sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line)) + sys.exit( + "unable to find {}, included in {} on line {}".format( + filename, abs_path, line + ) + ) if expanded: - print('#line {} "{}"'.format(line+1, abs_path), file=source_out) + print('#line {} "{}"'.format(line + 1, abs_path), file=source_out) elif text != "#pragma once\n": source_out.write(text) + def main(): - parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation") + parser = argparse.ArgumentParser( + description="Transform a unity build into an amalgamation" + ) parser.add_argument("source", help="source file") - parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers") - parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers") - parser.add_argument("-x", action="append", dest="excluded", help="excluded header files") + parser.add_argument( + "-I", + action="append", + dest="include_paths", + help="include paths for private headers", + ) + parser.add_argument( + "-i", + action="append", + dest="public_include_paths", + help="include paths for public headers", + ) + parser.add_argument( + "-x", action="append", dest="excluded", help="excluded header files" + ) parser.add_argument("-o", dest="source_out", help="output C++ file", required=True) - parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True) + parser.add_argument( + "-H", dest="header_out", help="output C++ header file", required=True + ) args = parser.parse_args() include_paths = list(map(path.abspath, args.include_paths or [])) @@ -102,10 +154,15 @@ def main(): excluded.update(map(path.abspath, args.excluded or [])) filename = args.source abs_path = path.abspath(filename) - with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out: + with open(filename) as f, open(args.source_out, "w") as source_out, open( + args.header_out, "w" + ) as header_out: print('#line 1 "{}"'.format(filename), file=source_out) print('#include "{}"'.format(header_out.name), file=source_out) - process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths) + process_file( + f, abs_path, source_out, header_out, include_paths, public_include_paths + ) + if __name__ == "__main__": main() diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py index 2d5f962e1..cd666d078 100755 --- a/build_tools/benchmark_log_tool.py +++ b/build_tools/benchmark_log_tool.py @@ -4,23 +4,27 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -'''Access the results of benchmark runs +"""Access the results of benchmark runs Send these results on to OpenSearch graphing service -''' +""" import argparse import itertools +import logging import os import re import sys + import requests from dateutil import parser -import logging logging.basicConfig(level=logging.DEBUG) + + class Configuration: - opensearch_user = os.environ['ES_USER'] - opensearch_pass = os.environ['ES_PASS'] + opensearch_user = os.environ["ES_USER"] + opensearch_pass = os.environ["ES_PASS"] + class BenchmarkResultException(Exception): def __init__(self, message, content): @@ -30,45 +34,71 @@ class BenchmarkResultException(Exception): class BenchmarkUtils: - expected_keys = ['ops_sec', 'mb_sec', 'lsm_sz', 'blob_sz', 'c_wgb', 'w_amp', - 'c_mbps', 'c_wsecs', 'c_csecs', 'b_rgb', 'b_wgb', 'usec_op', - 'p50', 'p99', 'p99.9', 'p99.99', 'pmax', - 'uptime', 'stall%', 'Nstall', 'u_cpu', 's_cpu', 'rss', 'test', 'date', 'version', 'job_id'] + expected_keys = [ + "ops_sec", + "mb_sec", + "lsm_sz", + "blob_sz", + "c_wgb", + "w_amp", + "c_mbps", + "c_wsecs", + "c_csecs", + "b_rgb", + "b_wgb", + "usec_op", + "p50", + "p99", + "p99.9", + "p99.99", + "pmax", + "uptime", + "stall%", + "Nstall", + "u_cpu", + "s_cpu", + "rss", + "test", + "date", + "version", + "job_id", + ] def sanity_check(row): - if not 'test' in row: + if "test" not in row: logging.debug(f"not 'test' in row: {row}") return False - if row['test'] == '': + if row["test"] == "": logging.debug(f"row['test'] == '': {row}") return False - if not 'date' in row: + if "date" not in row: logging.debug(f"not 'date' in row: {row}") return False - if not 'ops_sec' in row: + if "ops_sec" not in row: logging.debug(f"not 'ops_sec' in row: {row}") return False try: - v = int(row['ops_sec']) + _ = int(row["ops_sec"]) except (ValueError, TypeError): logging.debug(f"int(row['ops_sec']): {row}") return False try: - (_, _) = parser.parse(row['date'], fuzzy_with_tokens=True) + (_, _) = parser.parse(row["date"], fuzzy_with_tokens=True) except (parser.ParserError): - logging.error(f"parser.parse((row['date']): not a valid format for date in row: {row}") + logging.error( + f"parser.parse((row['date']): not a valid format for date in row: {row}" + ) return False return True def conform_opensearch(row): - (dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True) + (dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True) # create a test_date field, which was previously what was expected # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month) # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55 - row['test_date'] = dt.isoformat() - row['date'] = dt.isoformat() - return dict((key.replace('.', '_'), value) - for (key, value) in row.items()) + row["test_date"] = dt.isoformat() + row["date"] = dt.isoformat() + return {key.replace(".", "_") : value for key, value in row.items()} class ResultParser: @@ -80,24 +110,24 @@ class ResultParser: def ignore(self, l_in: str): if len(l_in) == 0: return True - if l_in[0:1] == '#': + if l_in[0:1] == "#": return True return False - def line(self, l_in: str): - '''Parse a line into items + def line(self, line_in: str): + """Parse a line into items Being clever about separators - ''' - l = l_in + """ + line = line_in row = [] - while l != '': - match_item = self.field.match(l) + while line != "": + match_item = self.field.match(line) if match_item: item = match_item.group(0) row.append(item) - l = l[len(item):] + line = line[len(item) :] else: - match_intra = self.intra.match(l) + match_intra = self.intra.match(line) if match_intra: intra = match_intra.group(0) # Count the separators @@ -107,26 +137,27 @@ class ResultParser: sep_count = len(tabbed) - 1 if sep_count == 0: sep_count = 1 - for i in range(sep_count-1): - row.append('') - l = l[len(intra):] + for _ in range(sep_count - 1): + row.append("") + line = line[len(intra) :] else: - raise BenchmarkResultException( - 'Invalid TSV line', f"{l_in} at {l}") + raise BenchmarkResultException("Invalid TSV line", f"{line_in} at {line}") return row def parse(self, lines): - '''Parse something that iterates lines''' + """Parse something that iterates lines""" rows = [self.line(line) for line in lines if not self.ignore(line)] header = rows[0] width = len(header) - records = [{k: v for (k, v) in itertools.zip_longest( - header, row[:width])} for row in rows[1:]] + records = [ + {k: v for (k, v) in itertools.zip_longest(header, row[:width])} + for row in rows[1:] + ] return records def load_report_from_tsv(filename: str): - file = open(filename, 'r') + file = open(filename, "r") contents = file.readlines() file.close() parser = ResultParser() @@ -136,52 +167,70 @@ def load_report_from_tsv(filename: str): def push_report_to_opensearch(report, esdocument): - sanitized = [BenchmarkUtils.conform_opensearch(row) - for row in report if BenchmarkUtils.sanity_check(row)] - logging.debug(f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch") + sanitized = [ + BenchmarkUtils.conform_opensearch(row) + for row in report + if BenchmarkUtils.sanity_check(row) + ] + logging.debug( + f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch" + ) for single_benchmark in sanitized: logging.debug(f"upload benchmark: {single_benchmark}") response = requests.post( esdocument, - json=single_benchmark, auth=(os.environ['ES_USER'], os.environ['ES_PASS'])) + json=single_benchmark, + auth=(os.environ["ES_USER"], os.environ["ES_PASS"]), + ) logging.debug( - f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}") + f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}" + ) response.raise_for_status() + def push_report_to_null(report): - + for row in report: if BenchmarkUtils.sanity_check(row): logging.debug(f"row {row}") conformed = BenchmarkUtils.conform_opensearch(row) logging.debug(f"conformed row {conformed}") + def main(): - '''Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch + """Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch This tool will (1) Open a local tsv benchmark report file (2) Upload to OpenSearch document, via https/JSON - ''' + """ - parser = argparse.ArgumentParser( - description='CircleCI benchmark scraper.') + parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.") # --tsvfile is the name of the file to read results from # --esdocument is the ElasticSearch document to push these results into # - parser.add_argument('--tsvfile', default='build_tools/circle_api_scraper_input.txt', - help='File from which to read tsv report') - parser.add_argument('--esdocument', help='ElasticSearch/OpenSearch document URL to upload report into') - parser.add_argument('--upload', choices=['opensearch', 'none'], default='opensearch') + parser.add_argument( + "--tsvfile", + default="build_tools/circle_api_scraper_input.txt", + help="File from which to read tsv report", + ) + parser.add_argument( + "--esdocument", + help="ElasticSearch/OpenSearch document URL to upload report into", + ) + parser.add_argument( + "--upload", choices=["opensearch", "none"], default="opensearch" + ) args = parser.parse_args() logging.debug(f"Arguments: {args}") reports = load_report_from_tsv(args.tsvfile) - if (args.upload == 'opensearch'): + if args.upload == "opensearch": push_report_to_opensearch(reports, args.esdocument) else: push_report_to_null(reports) -if __name__ == '__main__': + +if __name__ == "__main__": sys.exit(main()) diff --git a/build_tools/error_filter.py b/build_tools/error_filter.py index 5d840b2f2..c42df1f91 100644 --- a/build_tools/error_filter.py +++ b/build_tools/error_filter.py @@ -3,16 +3,13 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -'''Filter for error messages in test output: +"""Filter for error messages in test output: - Receives merged stdout/stderr from test on stdin - Finds patterns of known error messages for test name (first argument) - Prints those error messages to stdout -''' +""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals import re import sys @@ -20,23 +17,24 @@ import sys class ErrorParserBase(object): def parse_error(self, line): - '''Parses a line of test output. If it contains an error, returns a + """Parses a line of test output. If it contains an error, returns a formatted message describing the error; otherwise, returns None. Subclasses must override this method. - ''' + """ raise NotImplementedError class GTestErrorParser(ErrorParserBase): - '''A parser that remembers the last test that began running so it can print + """A parser that remembers the last test that began running so it can print that test's name upon detecting failure. - ''' - _GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$') + """ + + _GTEST_NAME_PATTERN = re.compile(r"\[ RUN \] (\S+)$") # format: ':: Failure' - _GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$') + _GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$") def __init__(self): - self._last_gtest_name = 'Unknown test' + self._last_gtest_name = "Unknown test" def parse_error(self, line): gtest_name_match = self._GTEST_NAME_PATTERN.match(line) @@ -45,14 +43,13 @@ class GTestErrorParser(ErrorParserBase): return None gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) if gtest_fail_match: - return '%s failed: %s' % ( - self._last_gtest_name, gtest_fail_match.group(1)) + return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1)) return None class MatchErrorParser(ErrorParserBase): - '''A simple parser that returns the whole line if it matches the pattern. - ''' + """A simple parser that returns the whole line if it matches the pattern.""" + def __init__(self, pattern): self._pattern = re.compile(pattern) @@ -69,97 +66,104 @@ class CompilerErrorParser(MatchErrorParser): # format (link error): # ':: error: ' # The below regex catches both - super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:') + super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:") class ScanBuildErrorParser(MatchErrorParser): def __init__(self): - super(ScanBuildErrorParser, self).__init__( - r'scan-build: \d+ bugs found.$') + super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$") class DbCrashErrorParser(MatchErrorParser): def __init__(self): - super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.') + super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.") class WriteStressErrorParser(MatchErrorParser): def __init__(self): super(WriteStressErrorParser, self).__init__( - r'ERROR: write_stress died with exitcode=\d+') + r"ERROR: write_stress died with exitcode=\d+" + ) class AsanErrorParser(MatchErrorParser): def __init__(self): - super(AsanErrorParser, self).__init__( - r'==\d+==ERROR: AddressSanitizer:') + super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:") class UbsanErrorParser(MatchErrorParser): def __init__(self): # format: '::: runtime error: ' - super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:') + super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:") class ValgrindErrorParser(MatchErrorParser): def __init__(self): # just grab the summary, valgrind doesn't clearly distinguish errors # from other log messages. - super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:') + super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:") class CompatErrorParser(MatchErrorParser): def __init__(self): - super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$') + super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$") class TsanErrorParser(MatchErrorParser): def __init__(self): - super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:') + super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:") _TEST_NAME_TO_PARSERS = { - 'punit': [CompilerErrorParser, GTestErrorParser], - 'unit': [CompilerErrorParser, GTestErrorParser], - 'release': [CompilerErrorParser, GTestErrorParser], - 'unit_481': [CompilerErrorParser, GTestErrorParser], - 'release_481': [CompilerErrorParser, GTestErrorParser], - 'clang_unit': [CompilerErrorParser, GTestErrorParser], - 'clang_release': [CompilerErrorParser, GTestErrorParser], - 'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser], - 'code_cov': [CompilerErrorParser, GTestErrorParser], - 'unity': [CompilerErrorParser, GTestErrorParser], - 'lite': [CompilerErrorParser], - 'lite_test': [CompilerErrorParser, GTestErrorParser], - 'stress_crash': [CompilerErrorParser, DbCrashErrorParser], - 'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser], - 'stress_crash_with_txn': [CompilerErrorParser, DbCrashErrorParser], - 'write_stress': [CompilerErrorParser, WriteStressErrorParser], - 'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser], - 'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], - 'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], - 'asan_crash_with_txn': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], - 'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], - 'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], - 'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], - 'ubsan_crash_with_txn': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], - 'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], - 'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser], - 'format_compatible': [CompilerErrorParser, CompatErrorParser], - 'run_format_compatible': [CompilerErrorParser, CompatErrorParser], - 'no_compression': [CompilerErrorParser, GTestErrorParser], - 'run_no_compression': [CompilerErrorParser, GTestErrorParser], - 'regression': [CompilerErrorParser], - 'run_regression': [CompilerErrorParser], + "punit": [CompilerErrorParser, GTestErrorParser], + "unit": [CompilerErrorParser, GTestErrorParser], + "release": [CompilerErrorParser, GTestErrorParser], + "unit_481": [CompilerErrorParser, GTestErrorParser], + "release_481": [CompilerErrorParser, GTestErrorParser], + "clang_unit": [CompilerErrorParser, GTestErrorParser], + "clang_release": [CompilerErrorParser, GTestErrorParser], + "clang_analyze": [CompilerErrorParser, ScanBuildErrorParser], + "code_cov": [CompilerErrorParser, GTestErrorParser], + "unity": [CompilerErrorParser, GTestErrorParser], + "lite": [CompilerErrorParser], + "lite_test": [CompilerErrorParser, GTestErrorParser], + "stress_crash": [CompilerErrorParser, DbCrashErrorParser], + "stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser], + "stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser], + "write_stress": [CompilerErrorParser, WriteStressErrorParser], + "asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser], + "asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], + "asan_crash_with_atomic_flush": [ + CompilerErrorParser, + AsanErrorParser, + DbCrashErrorParser, + ], + "asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], + "ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], + "ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], + "ubsan_crash_with_atomic_flush": [ + CompilerErrorParser, + UbsanErrorParser, + DbCrashErrorParser, + ], + "ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], + "valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], + "tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser], + "format_compatible": [CompilerErrorParser, CompatErrorParser], + "run_format_compatible": [CompilerErrorParser, CompatErrorParser], + "no_compression": [CompilerErrorParser, GTestErrorParser], + "run_no_compression": [CompilerErrorParser, GTestErrorParser], + "regression": [CompilerErrorParser], + "run_regression": [CompilerErrorParser], } def main(): if len(sys.argv) != 2: - return 'Usage: %s ' % sys.argv[0] + return "Usage: %s " % sys.argv[0] test_name = sys.argv[1] if test_name not in _TEST_NAME_TO_PARSERS: - return 'Unknown test name: %s' % test_name + return "Unknown test name: %s" % test_name error_parsers = [] for parser_cls in _TEST_NAME_TO_PARSERS[test_name]: @@ -173,5 +177,5 @@ def main(): print(error_msg) -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/coverage/parse_gcov_output.py b/coverage/parse_gcov_output.py index 02edc2a59..b9788ec81 100644 --- a/coverage/parse_gcov_output.py +++ b/coverage/parse_gcov_output.py @@ -47,35 +47,39 @@ def parse_gcov_report(gcov_input): return per_file_coverage, total_coverage + def get_option_parser(): - usage = "Parse the gcov output and generate more human-readable code " +\ - "coverage report." + usage = ( + "Parse the gcov output and generate more human-readable code " + + "coverage report." + ) parser = optparse.OptionParser(usage) parser.add_option( - "--interested-files", "-i", + "--interested-files", + "-i", dest="filenames", - help="Comma separated files names. if specified, we will display " + - "the coverage report only for interested source files. " + - "Otherwise we will display the coverage report for all " + - "source files." + help="Comma separated files names. if specified, we will display " + + "the coverage report only for interested source files. " + + "Otherwise we will display the coverage report for all " + + "source files.", ) return parser + def display_file_coverage(per_file_coverage, total_coverage): # To print out auto-adjustable column, we need to know the longest # length of file names. - max_file_name_length = max( - len(fname) for fname in per_file_coverage.keys() - ) + max_file_name_length = max(len(fname) for fname in per_file_coverage.keys()) # -- Print header # size of separator is determined by 3 column sizes: # file name, coverage percentage and lines. - header_template = \ - "%" + str(max_file_name_length) + "s\t%s\t%s" + header_template = "%" + str(max_file_name_length) + "s\t%s\t%s" separator = "-" * (max_file_name_length + 10 + 20) - print(header_template % ("Filename", "Coverage", "Lines")) # noqa: E999 T25377293 Grandfathered in + print( + header_template % ("Filename", "Coverage", "Lines") + ) # noqa: E999 T25377293 Grandfathered in print(separator) # -- Print body @@ -91,13 +95,14 @@ def display_file_coverage(per_file_coverage, total_coverage): print(separator) print(record_template % ("Total", total_coverage[0], total_coverage[1])) + def report_coverage(): parser = get_option_parser() (options, args) = parser.parse_args() interested_files = set() if options.filenames is not None: - interested_files = set(f.strip() for f in options.filenames.split(',')) + interested_files = {f.strip() for f in options.filenames.split(",")} # To make things simple, right now we only read gcov report from the input per_file_coverage, total_coverage = parse_gcov_report(sys.stdin) @@ -105,7 +110,8 @@ def report_coverage(): # Check if we need to display coverage info for interested files. if len(interested_files): per_file_coverage = dict( - (fname, per_file_coverage[fname]) for fname in interested_files + (fname, per_file_coverage[fname]) + for fname in interested_files if fname in per_file_coverage ) # If we only interested in several files, it makes no sense to report @@ -117,5 +123,6 @@ def report_coverage(): return display_file_coverage(per_file_coverage, total_coverage) + if __name__ == "__main__": report_coverage() diff --git a/tools/advisor/advisor/bench_runner.py b/tools/advisor/advisor/bench_runner.py index 7c7ee7882..ba8c64919 100644 --- a/tools/advisor/advisor/bench_runner.py +++ b/tools/advisor/advisor/bench_runner.py @@ -3,8 +3,8 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from abc import ABC, abstractmethod import re +from abc import ABC, abstractmethod class BenchmarkRunner(ABC): @@ -25,15 +25,15 @@ class BenchmarkRunner(ABC): # 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is # not specified in the OPTIONS file, then the location of the log file # will be /dev/shm and the name of the file will be 'LOG' - file_name = '' + file_name = "" if log_dir: # refer GetInfoLogPrefix() in rocksdb/util/filename.cc # example db_path: /dev/shm/dbbench file_name = db_path[1:] # to ignore the leading '/' character - to_be_replaced = re.compile('[^0-9a-zA-Z\-_\.]') + to_be_replaced = re.compile("[^0-9a-zA-Z\-_\.]") # noqa for character in to_be_replaced.findall(db_path): - file_name = file_name.replace(character, '_') - if not file_name.endswith('_'): - file_name += '_' - file_name += 'LOG' + file_name = file_name.replace(character, "_") + if not file_name.endswith("_"): + file_name += "_" + file_name += "LOG" return file_name diff --git a/tools/advisor/advisor/config_optimizer_example.py b/tools/advisor/advisor/config_optimizer_example.py index e3736387e..40e2bb953 100644 --- a/tools/advisor/advisor/config_optimizer_example.py +++ b/tools/advisor/advisor/config_optimizer_example.py @@ -4,6 +4,7 @@ # (found in the LICENSE.Apache file in the root directory). import argparse + from advisor.db_config_optimizer import ConfigOptimizer from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_options_parser import DatabaseOptions @@ -23,44 +24,35 @@ def main(args): bench_runner_class = getattr(bench_runner_module, args.benchrunner_class) ods_args = {} if args.ods_client and args.ods_entity: - ods_args['client_script'] = args.ods_client - ods_args['entity'] = args.ods_entity + ods_args["client_script"] = args.ods_client + ods_args["entity"] = args.ods_entity if args.ods_key_prefix: - ods_args['key_prefix'] = args.ods_key_prefix + ods_args["key_prefix"] = args.ods_key_prefix db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args) # initialise the database configuration db_options = DatabaseOptions(args.rocksdb_options, args.misc_options) # set the frequency at which stats are dumped in the LOG file and the # location of the LOG file. db_log_dump_settings = { - "DBOptions.stats_dump_period_sec": { - NO_COL_FAMILY: args.stats_dump_period_sec - } + "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: args.stats_dump_period_sec} } db_options.update_options(db_log_dump_settings) # initialise the configuration optimizer config_optimizer = ConfigOptimizer( - db_bench_runner, - db_options, - rule_spec_parser, - args.base_db_path + db_bench_runner, db_options, rule_spec_parser, args.base_db_path ) # run the optimiser to improve the database configuration for given # benchmarks, with the help of expert-specified rules final_db_options = config_optimizer.run() # generate the final rocksdb options file print( - 'Final configuration in: ' + - final_db_options.generate_options_config('final') - ) - print( - 'Final miscellaneous options: ' + - repr(final_db_options.get_misc_options()) + "Final configuration in: " + final_db_options.generate_options_config("final") ) + print("Final miscellaneous options: " + repr(final_db_options.get_misc_options())) -if __name__ == '__main__': - ''' +if __name__ == "__main__": + """ An example run of this tool from the command-line would look like: python3 -m advisor.config_optimizer_example --base_db_path=/tmp/rocksdbtest-155919/dbbench @@ -69,66 +61,80 @@ if __name__ == '__main__': --benchrunner_module=advisor.db_bench_runner --benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench readwhilewriting use_existing_db=true duration=90 - ''' - parser = argparse.ArgumentParser(description='This script is used for\ - searching for a better database configuration') + """ + parser = argparse.ArgumentParser( + description="This script is used for\ + searching for a better database configuration" + ) parser.add_argument( - '--rocksdb_options', required=True, type=str, - help='path of the starting Rocksdb OPTIONS file' + "--rocksdb_options", + required=True, + type=str, + help="path of the starting Rocksdb OPTIONS file", ) # these are options that are column-family agnostic and are not yet # supported by the Rocksdb Options file: eg. bloom_bits=2 parser.add_argument( - '--misc_options', nargs='*', - help='whitespace-separated list of options that are not supported ' + - 'by the Rocksdb OPTIONS file, given in the ' + - '= format eg. "bloom_bits=2 ' + - 'rate_limiter_bytes_per_sec=128000000"') - parser.add_argument( - '--base_db_path', required=True, type=str, - help='path for the Rocksdb database' + "--misc_options", + nargs="*", + help="whitespace-separated list of options that are not supported " + + "by the Rocksdb OPTIONS file, given in the " + + '= format eg. "bloom_bits=2 ' + + 'rate_limiter_bytes_per_sec=128000000"', ) parser.add_argument( - '--rules_spec', required=True, type=str, - help='path of the file containing the expert-specified Rules' + "--base_db_path", required=True, type=str, help="path for the Rocksdb database" ) parser.add_argument( - '--stats_dump_period_sec', required=True, type=int, - help='the frequency (in seconds) at which STATISTICS are printed to ' + - 'the Rocksdb LOG file' + "--rules_spec", + required=True, + type=str, + help="path of the file containing the expert-specified Rules", ) - # ODS arguments parser.add_argument( - '--ods_client', type=str, help='the ODS client binary' + "--stats_dump_period_sec", + required=True, + type=int, + help="the frequency (in seconds) at which STATISTICS are printed to " + + "the Rocksdb LOG file", ) + # ODS arguments + parser.add_argument("--ods_client", type=str, help="the ODS client binary") parser.add_argument( - '--ods_entity', type=str, - help='the servers for which the ODS stats need to be fetched' + "--ods_entity", + type=str, + help="the servers for which the ODS stats need to be fetched", ) parser.add_argument( - '--ods_key_prefix', type=str, - help='the prefix that needs to be attached to the keys of time ' + - 'series to be fetched from ODS' + "--ods_key_prefix", + type=str, + help="the prefix that needs to be attached to the keys of time " + + "series to be fetched from ODS", ) # benchrunner_module example: advisor.db_benchmark_client parser.add_argument( - '--benchrunner_module', required=True, type=str, - help='the module containing the BenchmarkRunner class to be used by ' + - 'the Optimizer, example: advisor.db_bench_runner' + "--benchrunner_module", + required=True, + type=str, + help="the module containing the BenchmarkRunner class to be used by " + + "the Optimizer, example: advisor.db_bench_runner", ) # benchrunner_class example: DBBenchRunner parser.add_argument( - '--benchrunner_class', required=True, type=str, - help='the name of the BenchmarkRunner class to be used by the ' + - 'Optimizer, should be present in the module provided in the ' + - 'benchrunner_module argument, example: DBBenchRunner' + "--benchrunner_class", + required=True, + type=str, + help="the name of the BenchmarkRunner class to be used by the " + + "Optimizer, should be present in the module provided in the " + + "benchrunner_module argument, example: DBBenchRunner", ) parser.add_argument( - '--benchrunner_pos_args', nargs='*', - help='whitespace-separated positional arguments that are passed on ' + - 'to the constructor of the BenchmarkRunner class provided in the ' + - 'benchrunner_class argument, example: "use_existing_db=true ' + - 'duration=900"' + "--benchrunner_pos_args", + nargs="*", + help="whitespace-separated positional arguments that are passed on " + + "to the constructor of the BenchmarkRunner class provided in the " + + 'benchrunner_class argument, example: "use_existing_db=true ' + + 'duration=900"', ) args = parser.parse_args() main(args) diff --git a/tools/advisor/advisor/db_bench_runner.py b/tools/advisor/advisor/db_bench_runner.py index 54424440b..f5802ed15 100644 --- a/tools/advisor/advisor/db_bench_runner.py +++ b/tools/advisor/advisor/db_bench_runner.py @@ -3,19 +3,22 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.bench_runner import BenchmarkRunner -from advisor.db_log_parser import DataSource, DatabaseLogs, NO_COL_FAMILY -from advisor.db_stats_fetcher import ( - LogStatsParser, OdsStatsFetcher, DatabasePerfContext -) import shutil import subprocess import time +from advisor.bench_runner import BenchmarkRunner +from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY +from advisor.db_stats_fetcher import ( + DatabasePerfContext, + LogStatsParser, + OdsStatsFetcher, +) + -''' +""" NOTE: This is not thread-safe, because the output file is simply overwritten. -''' +""" class DBBenchRunner(BenchmarkRunner): @@ -37,9 +40,7 @@ class DBBenchRunner(BenchmarkRunner): optional_args_str = "" for option_name, option_value in misc_options_dict.items(): if option_value: - optional_args_str += ( - " --" + option_name + "=" + str(option_value) - ) + optional_args_str += " --" + option_name + "=" + str(option_value) return optional_args_str def __init__(self, positional_args, ods_args=None): @@ -54,19 +55,17 @@ class DBBenchRunner(BenchmarkRunner): self.ods_args = ods_args def _parse_output(self, get_perf_context=False): - ''' + """ Sample db_bench output after running 'readwhilewriting' benchmark: DB path: [/tmp/rocksdbtest-155919/dbbench]\n readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\ of 5427999 found)\n PERF_CONTEXT:\n user_key_comparison_count = 500466712, block_cache_hit_count = ...\n - ''' - output = { - self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None - } + """ + output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None} perf_context_begins = False - with open(self.OUTPUT_FILE, 'r') as fp: + with open(self.OUTPUT_FILE, "r") as fp: for line in fp: if line.startswith(self.benchmark): # line from sample output: @@ -77,9 +76,7 @@ class DBBenchRunner(BenchmarkRunner): for ix, token in enumerate(token_list): if token.startswith(self.THROUGHPUT): # in above example, throughput = 60305 ops/sec - output[self.THROUGHPUT] = ( - float(token_list[ix - 1]) - ) + output[self.THROUGHPUT] = float(token_list[ix - 1]) break elif get_perf_context and line.startswith(self.PERF_CON): # the following lines in the output contain perf context @@ -89,11 +86,11 @@ class DBBenchRunner(BenchmarkRunner): # Sample perf_context output: # user_key_comparison_count = 500, block_cache_hit_count =\ # 468, block_read_count = 580, block_read_byte = 445, ... - token_list = line.strip().split(',') + token_list = line.strip().split(",") # token_list = ['user_key_comparison_count = 500', # 'block_cache_hit_count = 468','block_read_count = 580'... perf_context = { - tk.split('=')[0].strip(): tk.split('=')[1].strip() + tk.split("=")[0].strip(): tk.split("=")[1].strip() for tk in token_list if tk } @@ -103,17 +100,13 @@ class DBBenchRunner(BenchmarkRunner): timestamp = int(time.time()) perf_context_ts = {} for stat in perf_context.keys(): - perf_context_ts[stat] = { - timestamp: int(perf_context[stat]) - } + perf_context_ts[stat] = {timestamp: int(perf_context[stat])} output[self.PERF_CON] = perf_context_ts perf_context_begins = False elif line.startswith(self.DB_PATH): # line from sample output: # DB path: [/tmp/rocksdbtest-155919/dbbench]\n - output[self.DB_PATH] = ( - line.split('[')[1].split(']')[0] - ) + output[self.DB_PATH] = line.split("[")[1].split("]")[0] return output def get_log_options(self, db_options, db_path): @@ -124,40 +117,38 @@ class DBBenchRunner(BenchmarkRunner): logs_file_prefix = None # fetch frequency at which the stats are dumped in the Rocksdb logs - dump_period = 'DBOptions.stats_dump_period_sec' + dump_period = "DBOptions.stats_dump_period_sec" # fetch the directory, if specified, in which the Rocksdb logs are # dumped, by default logs are dumped in same location as database - log_dir = 'DBOptions.db_log_dir' + log_dir = "DBOptions.db_log_dir" log_options = db_options.get_options([dump_period, log_dir]) if dump_period in log_options: stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY]) if log_dir in log_options: log_dir_path = log_options[log_dir][NO_COL_FAMILY] - log_file_name = DBBenchRunner.get_info_log_file_name( - log_dir_path, db_path - ) + log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path) if not log_dir_path: log_dir_path = db_path - if not log_dir_path.endswith('/'): - log_dir_path += '/' + if not log_dir_path.endswith("/"): + log_dir_path += "/" logs_file_prefix = log_dir_path + log_file_name return (logs_file_prefix, stats_freq_sec) def _get_options_command_line_args_str(self, curr_options): - ''' + """ This method uses the provided Rocksdb OPTIONS to create a string of command-line arguments for db_bench. The --options_file argument is always given and the options that are not supported by the OPTIONS file are given as separate arguments. - ''' + """ optional_args_str = DBBenchRunner.get_opt_args_str( curr_options.get_misc_options() ) # generate an options configuration file - options_file = curr_options.generate_options_config(nonce='12345') + options_file = curr_options.generate_options_config(nonce="12345") optional_args_str += " --options_file=" + options_file return optional_args_str @@ -166,10 +157,11 @@ class DBBenchRunner(BenchmarkRunner): try: shutil.rmtree(db_path, ignore_errors=True) except OSError as e: - print('Error: rmdir ' + e.filename + ' ' + e.strerror) + print("Error: rmdir " + e.filename + " " + e.strerror) # setup database with a million keys using the fillrandom benchmark command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % ( - self.db_bench_binary, db_path + self.db_bench_binary, + db_path, ) args_str = self._get_options_command_line_args_str(curr_options) command += args_str @@ -177,21 +169,23 @@ class DBBenchRunner(BenchmarkRunner): def _build_experiment_command(self, curr_options, db_path): command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % ( - self.db_bench_binary, self.benchmark, db_path + self.db_bench_binary, + self.benchmark, + db_path, ) # fetch the command-line arguments string for providing Rocksdb options args_str = self._get_options_command_line_args_str(curr_options) # handle the command-line args passed in the constructor, these # arguments are specific to db_bench for cmd_line_arg in self.db_bench_args: - args_str += (" --" + cmd_line_arg) + args_str += " --" + cmd_line_arg command += args_str return command def _run_command(self, command): out_file = open(self.OUTPUT_FILE, "w+") err_file = open(self.ERROR_FILE, "w+") - print('executing... - ' + command) + print("executing... - " + command) subprocess.call(command, shell=True, stdout=out_file, stderr=err_file) out_file.close() err_file.close() @@ -214,32 +208,30 @@ class DBBenchRunner(BenchmarkRunner): db_options, parsed_output[self.DB_PATH] ) # create the Rocksbd LOGS object - db_logs = DatabaseLogs( - logs_file_prefix, db_options.get_column_families() - ) + db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families()) # Create the Log STATS object db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec) # Create the PerfContext STATS object - db_perf_context = DatabasePerfContext( - parsed_output[self.PERF_CON], 0, False - ) + db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False) # create the data-sources dictionary data_sources = { DataSource.Type.DB_OPTIONS: [db_options], DataSource.Type.LOG: [db_logs], - DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context] + DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context], } # Create the ODS STATS object if self.ods_args: - key_prefix = '' - if 'key_prefix' in self.ods_args: - key_prefix = self.ods_args['key_prefix'] - data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher( - self.ods_args['client_script'], - self.ods_args['entity'], - experiment_start_time, - experiment_end_time, - key_prefix - )) + key_prefix = "" + if "key_prefix" in self.ods_args: + key_prefix = self.ods_args["key_prefix"] + data_sources[DataSource.Type.TIME_SERIES].append( + OdsStatsFetcher( + self.ods_args["client_script"], + self.ods_args["entity"], + experiment_start_time, + experiment_end_time, + key_prefix, + ) + ) # return the experiment's data-sources and throughput return data_sources, parsed_output[self.THROUGHPUT] diff --git a/tools/advisor/advisor/db_config_optimizer.py b/tools/advisor/advisor/db_config_optimizer.py index 508c0f8fe..413778478 100644 --- a/tools/advisor/advisor/db_config_optimizer.py +++ b/tools/advisor/advisor/db_config_optimizer.py @@ -3,16 +3,17 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). +import copy +import random + from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_options_parser import DatabaseOptions from advisor.rule_parser import Suggestion -import copy -import random class ConfigOptimizer: - SCOPE = 'scope' - SUGG_VAL = 'suggested values' + SCOPE = "scope" + SUGG_VAL = "suggested values" @staticmethod def apply_action_on_value(old_value, action, suggested_values): @@ -21,7 +22,7 @@ class ConfigOptimizer: chosen_sugg_val = random.choice(list(suggested_values)) new_value = None if action is Suggestion.Action.set or not old_value: - assert(chosen_sugg_val) + assert chosen_sugg_val new_value = chosen_sugg_val else: # For increase/decrease actions, currently the code tries to make @@ -61,8 +62,8 @@ class ConfigOptimizer: # A Suggestion in the rules spec must have the 'option' and # 'action' fields defined, always call perform_checks() method # after parsing the rules file using RulesSpec - assert(option) - assert(action) + assert option + assert action required_options.append(option) rule_suggestions.append(suggestions_dict[sugg_name]) current_config = options.get_options(required_options) @@ -87,8 +88,9 @@ class ConfigOptimizer: updated_config[sugg.option][col_fam] = new_value except AssertionError: print( - 'WARNING(ConfigOptimizer): provide suggested_values ' + - 'for ' + sugg.option + "WARNING(ConfigOptimizer): provide suggested_values " + + "for " + + sugg.option ) continue # case: when the option is present in the current configuration @@ -103,8 +105,9 @@ class ConfigOptimizer: updated_config[sugg.option][NO_COL_FAMILY] = new_value except AssertionError: print( - 'WARNING(ConfigOptimizer): provide suggested_values ' + - 'for ' + sugg.option + "WARNING(ConfigOptimizer): provide suggested_values " + + "for " + + sugg.option ) else: for col_fam in rule.get_trigger_column_families(): @@ -120,15 +123,16 @@ class ConfigOptimizer: updated_config[sugg.option][col_fam] = new_value except AssertionError: print( - 'WARNING(ConfigOptimizer): provide ' + - 'suggested_values for ' + sugg.option + "WARNING(ConfigOptimizer): provide " + + "suggested_values for " + + sugg.option ) return current_config, updated_config @staticmethod def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack): if not rules: - print('\nNo more rules triggered!') + print("\nNo more rules triggered!") return None # if the last rule provided an improvement in the database performance, # and it was triggered again (i.e. it is present in 'rules'), then pick @@ -143,7 +147,7 @@ class ConfigOptimizer: for rule in rules: if rule.name not in rules_tried: return rule - print('\nAll rules have been exhausted') + print("\nAll rules have been exhausted") return None @staticmethod @@ -153,13 +157,13 @@ class ConfigOptimizer: rules_tried, backtrack, curr_options, - suggestions_dict + suggestions_dict, ): curr_rule = ConfigOptimizer.pick_rule_to_apply( triggered_rules, current_rule_name, rules_tried, backtrack ) if not curr_rule: - return tuple([None]*4) + return tuple([None] * 4) # if a rule has been picked for improving db_config, update rules_tried rules_tried.add(curr_rule.name) # get updated config based on the picked rule @@ -168,17 +172,20 @@ class ConfigOptimizer: ) conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf) if not conf_diff: # the current and updated configs are the same - curr_rule, rules_tried, curr_conf, updated_conf = ( - ConfigOptimizer.apply_suggestions( - triggered_rules, - None, - rules_tried, - backtrack, - curr_options, - suggestions_dict - ) + ( + curr_rule, + rules_tried, + curr_conf, + updated_conf, + ) = ConfigOptimizer.apply_suggestions( + triggered_rules, + None, + rules_tried, + backtrack, + curr_options, + suggestions_dict, ) - print('returning from apply_suggestions') + print("returning from apply_suggestions") return (curr_rule, rules_tried, curr_conf, updated_conf) # TODO(poojam23): check if this method is required or can we directly set @@ -205,52 +212,53 @@ class ConfigOptimizer: # RULE from all the triggered rules and apply all its suggestions to # the appropriate options. # bootstrapping the optimizer - print('Bootstrapping optimizer:') + print("Bootstrapping optimizer:") options = copy.deepcopy(self.db_options) - old_data_sources, old_metric = ( - self.bench_runner.run_experiment(options, self.base_db_path) + old_data_sources, old_metric = self.bench_runner.run_experiment( + options, self.base_db_path ) - print('Initial metric: ' + str(old_metric)) + print("Initial metric: " + str(old_metric)) self.rule_parser.load_rules_from_spec() self.rule_parser.perform_section_checks() triggered_rules = self.rule_parser.get_triggered_rules( old_data_sources, options.get_column_families() ) - print('\nTriggered:') + print("\nTriggered:") self.rule_parser.print_rules(triggered_rules) backtrack = False rules_tried = set() - curr_rule, rules_tried, curr_conf, updated_conf = ( - ConfigOptimizer.apply_suggestions( - triggered_rules, - None, - rules_tried, - backtrack, - options, - self.rule_parser.get_suggestions_dict() - ) + ( + curr_rule, + rules_tried, + curr_conf, + updated_conf, + ) = ConfigOptimizer.apply_suggestions( + triggered_rules, + None, + rules_tried, + backtrack, + options, + self.rule_parser.get_suggestions_dict(), ) # the optimizer loop while curr_rule: - print('\nRule picked for next iteration:') + print("\nRule picked for next iteration:") print(curr_rule.name) - print('\ncurrent config:') + print("\ncurrent config:") print(curr_conf) - print('updated config:') + print("updated config:") print(updated_conf) options.update_options(updated_conf) # run bench_runner with updated config - new_data_sources, new_metric = ( - self.bench_runner.run_experiment(options, self.base_db_path) - ) - print('\nnew metric: ' + str(new_metric)) - backtrack = not self.bench_runner.is_metric_better( - new_metric, old_metric + new_data_sources, new_metric = self.bench_runner.run_experiment( + options, self.base_db_path ) + print("\nnew metric: " + str(new_metric)) + backtrack = not self.bench_runner.is_metric_better(new_metric, old_metric) # update triggered_rules, metric, data_sources, if required if backtrack: # revert changes to options config - print('\nBacktracking to previous configuration') + print("\nBacktracking to previous configuration") backtrack_conf = ConfigOptimizer.get_backtrack_config( curr_conf, updated_conf ) @@ -262,21 +270,24 @@ class ConfigOptimizer: triggered_rules = self.rule_parser.get_triggered_rules( new_data_sources, options.get_column_families() ) - print('\nTriggered:') + print("\nTriggered:") self.rule_parser.print_rules(triggered_rules) old_metric = new_metric old_data_sources = new_data_sources rules_tried = set() # pick rule to work on and set curr_rule to that - curr_rule, rules_tried, curr_conf, updated_conf = ( - ConfigOptimizer.apply_suggestions( - triggered_rules, - curr_rule.name, - rules_tried, - backtrack, - options, - self.rule_parser.get_suggestions_dict() - ) + ( + curr_rule, + rules_tried, + curr_conf, + updated_conf, + ) = ConfigOptimizer.apply_suggestions( + triggered_rules, + curr_rule.name, + rules_tried, + backtrack, + options, + self.rule_parser.get_suggestions_dict(), ) # return the final database options configuration return options diff --git a/tools/advisor/advisor/db_log_parser.py b/tools/advisor/advisor/db_log_parser.py index efd41a81a..2ce8a74cb 100644 --- a/tools/advisor/advisor/db_log_parser.py +++ b/tools/advisor/advisor/db_log_parser.py @@ -3,15 +3,15 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from abc import ABC, abstractmethod -from calendar import timegm -from enum import Enum import glob import re import time +from abc import ABC, abstractmethod +from calendar import timegm +from enum import Enum -NO_COL_FAMILY = 'DB_WIDE' +NO_COL_FAMILY = "DB_WIDE" class DataSource(ABC): @@ -33,7 +33,7 @@ class Log: def is_new_log(log_line): # The assumption is that a new log will start with a date printed in # the below regex format. - date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}' + date_regex = "\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa return re.match(date_regex, log_line) def __init__(self, log_line, column_families): @@ -46,7 +46,7 @@ class Log: # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634] # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n" for col_fam in column_families: - search_for_str = '\[' + col_fam + '\]' + search_for_str = "\[" + col_fam + "\]" # noqa if re.search(search_for_str, self.message): self.column_family = col_fam break @@ -67,21 +67,26 @@ class Log: return self.message def append_message(self, remaining_log): - self.message = self.message + '\n' + remaining_log.strip() + self.message = self.message + "\n" + remaining_log.strip() def get_timestamp(self): # example: '2018/07/25-11:25:45.782710' will be converted to the GMT # Unix timestamp 1532517945 (note: this method assumes that self.time # is in GMT) - hr_time = self.time + 'GMT' + hr_time = self.time + "GMT" timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z")) return timestamp def __repr__(self): return ( - 'time: ' + self.time + '; context: ' + self.context + - '; col_fam: ' + self.column_family + - '; message: ' + self.message + "time: " + + self.time + + "; context: " + + self.context + + "; col_fam: " + + self.column_family + + "; message: " + + self.message ) @@ -106,22 +111,20 @@ class DatabaseLogs(DataSource): cond.set_trigger(trigger) def check_and_trigger_conditions(self, conditions): - for file_name in glob.glob(self.logs_path_prefix + '*'): + for file_name in glob.glob(self.logs_path_prefix + "*"): # TODO(poojam23): find a way to distinguish between log files # - generated in the current experiment but are labeled 'old' # because they LOGs exceeded the file size limit AND # - generated in some previous experiment that are also labeled # 'old' and were not deleted for some reason - if re.search('old', file_name, re.IGNORECASE): + if re.search("old", file_name, re.IGNORECASE): continue - with open(file_name, 'r') as db_logs: + with open(file_name, "r") as db_logs: new_log = None for line in db_logs: if Log.is_new_log(line): if new_log: - self.trigger_conditions_for_log( - conditions, new_log - ) + self.trigger_conditions_for_log(conditions, new_log) new_log = Log(line, self.column_families) else: # To account for logs split into multiple lines diff --git a/tools/advisor/advisor/db_options_parser.py b/tools/advisor/advisor/db_options_parser.py index e689d892a..062aeeec4 100644 --- a/tools/advisor/advisor/db_options_parser.py +++ b/tools/advisor/advisor/db_options_parser.py @@ -4,25 +4,26 @@ # (found in the LICENSE.Apache file in the root directory). import copy +import os + from advisor.db_log_parser import DataSource, NO_COL_FAMILY from advisor.ini_parser import IniParser -import os class OptionsSpecParser(IniParser): @staticmethod def is_new_option(line): - return '=' in line + return "=" in line @staticmethod def get_section_type(line): - ''' + """ Example section header: [TableOptions/BlockBasedTable "default"] Here ConfigurationOptimizer returned would be 'TableOptions.BlockBasedTable' - ''' + """ section_path = line.strip()[1:-1].split()[0] - section_type = '.'.join(section_path.split('/')) + section_type = ".".join(section_path.split("/")) return section_type @staticmethod @@ -39,20 +40,20 @@ class OptionsSpecParser(IniParser): # Example: # Case 1: get_section_str('DBOptions', NO_COL_FAMILY) # Case 2: get_section_str('TableOptions.BlockBasedTable', 'default') - section_type = '/'.join(section_type.strip().split('.')) + section_type = "/".join(section_type.strip().split(".")) # Case 1: section_type = 'DBOptions' # Case 2: section_type = 'TableOptions/BlockBasedTable' - section_str = '[' + section_type + section_str = "[" + section_type if section_name == NO_COL_FAMILY: # Case 1: '[DBOptions]' - return (section_str + ']') + return section_str + "]" else: # Case 2: '[TableOptions/BlockBasedTable "default"]' return section_str + ' "' + section_name + '"]' @staticmethod def get_option_str(key, values): - option_str = key + '=' + option_str = key + "=" # get_option_str('db_log_dir', None), returns 'db_log_dir=' if values: # example: @@ -61,7 +62,7 @@ class OptionsSpecParser(IniParser): # 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1' if isinstance(values, list): for value in values: - option_str += (str(value) + ':') + option_str += str(value) + ":" option_str = option_str[:-1] else: # example: get_option_str('write_buffer_size', 1048576) @@ -71,13 +72,12 @@ class OptionsSpecParser(IniParser): class DatabaseOptions(DataSource): - @staticmethod def is_misc_option(option_name): # these are miscellaneous options that are not yet supported by the # Rocksdb options file, hence they are not prefixed with any section # name - return '.' not in option_name + return "." not in option_name @staticmethod def get_options_diff(opt_old, opt_new): @@ -102,7 +102,7 @@ class DatabaseOptions(DataSource): if opt_old[opt][col_fam] != opt_new[opt][col_fam]: diff[opt][col_fam] = ( opt_old[opt][col_fam], - opt_new[opt][col_fam] + opt_new[opt][col_fam], ) else: diff[opt][col_fam] = (opt_old[opt][col_fam], None) @@ -111,7 +111,7 @@ class DatabaseOptions(DataSource): if opt_old[opt][col_fam] != opt_new[opt][col_fam]: diff[opt][col_fam] = ( opt_old[opt][col_fam], - opt_new[opt][col_fam] + opt_new[opt][col_fam], ) else: diff[opt][col_fam] = (None, opt_new[opt][col_fam]) @@ -137,21 +137,19 @@ class DatabaseOptions(DataSource): self.misc_options = {} if misc_options: for option_pair_str in misc_options: - option_name = option_pair_str.split('=')[0].strip() - option_value = option_pair_str.split('=')[1].strip() + option_name = option_pair_str.split("=")[0].strip() + option_value = option_pair_str.split("=")[1].strip() self.misc_options[option_name] = option_value def load_from_source(self, options_path): self.options_dict = {} - with open(options_path, 'r') as db_options: + with open(options_path, "r") as db_options: for line in db_options: line = OptionsSpecParser.remove_trailing_comment(line) if not line: continue if OptionsSpecParser.is_section_header(line): - curr_sec_type = ( - OptionsSpecParser.get_section_type(line) - ) + curr_sec_type = OptionsSpecParser.get_section_type(line) curr_sec_name = OptionsSpecParser.get_section_name(line) if curr_sec_type not in self.options_dict: self.options_dict[curr_sec_type] = {} @@ -163,17 +161,15 @@ class DatabaseOptions(DataSource): # CFOptions and 'default' is the name of a column family # that for this database, so it's added to the list of # column families stored in this object - if curr_sec_type == 'CFOptions': + if curr_sec_type == "CFOptions": if not self.column_families: self.column_families = [] self.column_families.append(curr_sec_name) elif OptionsSpecParser.is_new_option(line): key, value = OptionsSpecParser.get_key_value_pair(line) - self.options_dict[curr_sec_type][curr_sec_name][key] = ( - value - ) + self.options_dict[curr_sec_type][curr_sec_name][key] = value else: - error = 'Not able to parse line in Options file.' + error = "Not able to parse line in Options file." OptionsSpecParser.exit_with_parse_error(line, error) def get_misc_options(self): @@ -193,7 +189,7 @@ class DatabaseOptions(DataSource): for sec_type in self.options_dict: for col_fam in self.options_dict[sec_type]: for opt_name in self.options_dict[sec_type][col_fam]: - option = sec_type + '.' + opt_name + option = sec_type + "." + opt_name all_options.append(option) all_options.extend(list(self.misc_options.keys())) return self.get_options(all_options) @@ -211,24 +207,22 @@ class DatabaseOptions(DataSource): continue if option not in reqd_options_dict: reqd_options_dict[option] = {} - reqd_options_dict[option][NO_COL_FAMILY] = ( - self.misc_options[option] - ) + reqd_options_dict[option][NO_COL_FAMILY] = self.misc_options[option] else: # Example: option = 'TableOptions.BlockBasedTable.block_align' # then, sec_type = 'TableOptions.BlockBasedTable' - sec_type = '.'.join(option.split('.')[:-1]) + sec_type = ".".join(option.split(".")[:-1]) # opt_name = 'block_align' - opt_name = option.split('.')[-1] + opt_name = option.split(".")[-1] if sec_type not in self.options_dict: continue for col_fam in self.options_dict[sec_type]: if opt_name in self.options_dict[sec_type][col_fam]: if option not in reqd_options_dict: reqd_options_dict[option] = {} - reqd_options_dict[option][col_fam] = ( - self.options_dict[sec_type][col_fam][opt_name] - ) + reqd_options_dict[option][col_fam] = self.options_dict[ + sec_type + ][col_fam][opt_name] return reqd_options_dict def update_options(self, options): @@ -244,16 +238,19 @@ class DatabaseOptions(DataSource): # misc_options dictionary if NO_COL_FAMILY not in options[option]: print( - 'WARNING(DatabaseOptions.update_options): not ' + - 'updating option ' + option + ' because it is in ' + - 'misc_option format but its scope is not ' + - NO_COL_FAMILY + '. Check format of option.' + "WARNING(DatabaseOptions.update_options): not " + + "updating option " + + option + + " because it is in " + + "misc_option format but its scope is not " + + NO_COL_FAMILY + + ". Check format of option." ) continue self.misc_options[option] = options[option][NO_COL_FAMILY] else: - sec_name = '.'.join(option.split('.')[:-1]) - opt_name = option.split('.')[-1] + sec_name = ".".join(option.split(".")[:-1]) + opt_name = option.split(".")[-1] if sec_name not in self.options_dict: self.options_dict[sec_name] = {} for col_fam in options[option]: @@ -262,30 +259,26 @@ class DatabaseOptions(DataSource): # value if col_fam not in self.options_dict[sec_name]: self.options_dict[sec_name][col_fam] = {} - self.options_dict[sec_name][col_fam][opt_name] = ( - copy.deepcopy(options[option][col_fam]) + self.options_dict[sec_name][col_fam][opt_name] = copy.deepcopy( + options[option][col_fam] ) def generate_options_config(self, nonce): # this method generates a Rocksdb OPTIONS file in the INI format from # the options stored in self.options_dict this_path = os.path.abspath(os.path.dirname(__file__)) - file_name = '../temp/OPTIONS_' + str(nonce) + '.tmp' + file_name = "../temp/OPTIONS_" + str(nonce) + ".tmp" file_path = os.path.join(this_path, file_name) - with open(file_path, 'w') as fp: + with open(file_path, "w") as fp: for section in self.options_dict: for col_fam in self.options_dict[section]: - fp.write( - OptionsSpecParser.get_section_str(section, col_fam) + - '\n' - ) + fp.write(OptionsSpecParser.get_section_str(section, col_fam) + "\n") for option in self.options_dict[section][col_fam]: values = self.options_dict[section][col_fam][option] fp.write( - OptionsSpecParser.get_option_str(option, values) + - '\n' + OptionsSpecParser.get_option_str(option, values) + "\n" ) - fp.write('\n') + fp.write("\n") return file_path def check_and_trigger_conditions(self, conditions): @@ -299,10 +292,14 @@ class DatabaseOptions(DataSource): for ix, option in enumerate(cond.options): if option not in reqd_options_dict: print( - 'WARNING(DatabaseOptions.check_and_trigger): ' + - 'skipping condition ' + cond.name + ' because it ' - 'requires option ' + option + ' but this option is' + - ' not available' + "WARNING(DatabaseOptions.check_and_trigger): " + + "skipping condition " + + cond.name + + " because it " + "requires option " + + option + + " but this option is" + + " not available" ) missing_reqd_option = True break # required option is absent @@ -321,9 +318,7 @@ class DatabaseOptions(DataSource): if eval(cond.eval_expr): cond.set_trigger({NO_COL_FAMILY: options}) except Exception as e: - print( - 'WARNING(DatabaseOptions) check_and_trigger:' + str(e) - ) + print("WARNING(DatabaseOptions) check_and_trigger:" + str(e)) continue # for all the options that are not database-wide, we look for their @@ -340,14 +335,9 @@ class DatabaseOptions(DataSource): if present: try: if eval(cond.eval_expr): - col_fam_options_dict[col_fam] = ( - copy.deepcopy(options) - ) + col_fam_options_dict[col_fam] = copy.deepcopy(options) except Exception as e: - print( - 'WARNING(DatabaseOptions) check_and_trigger: ' + - str(e) - ) + print("WARNING(DatabaseOptions) check_and_trigger: " + str(e)) # Trigger for an OptionCondition object is of the form: # Dict[col_fam_name: List[option_value]] # where col_fam_name is the name of a column family for which diff --git a/tools/advisor/advisor/db_stats_fetcher.py b/tools/advisor/advisor/db_stats_fetcher.py index cf497cf1f..32e003368 100755 --- a/tools/advisor/advisor/db_stats_fetcher.py +++ b/tools/advisor/advisor/db_stats_fetcher.py @@ -3,17 +3,19 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.db_log_parser import Log -from advisor.db_timeseries_parser import TimeSeriesData, NO_ENTITY import copy import glob import re import subprocess import time +from typing import List + +from advisor.db_log_parser import Log +from advisor.db_timeseries_parser import NO_ENTITY, TimeSeriesData class LogStatsParser(TimeSeriesData): - STATS = 'STATISTICS:' + STATS = "STATISTICS:" @staticmethod def parse_log_line_for_stats(log_line): @@ -22,12 +24,8 @@ class LogStatsParser(TimeSeriesData): token_list = log_line.strip().split() # token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':', # '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0'] - stat_prefix = token_list[0] + '.' # 'rocksdb.db.get.micros.' - stat_values = [ - token - for token in token_list[1:] - if token != ':' - ] + stat_prefix = token_list[0] + "." # 'rocksdb.db.get.micros.' + stat_values = [token for token in token_list[1:] if token != ":"] # stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100', # '92.0'] stat_dict = {} @@ -58,7 +56,7 @@ class LogStatsParser(TimeSeriesData): # replace this with the appropriate key_prefix, remove these # characters here since the LogStatsParser does not need # a prefix - if key.startswith('[]'): + if key.startswith("[]"): reqd_stats.append(key[2:]) else: reqd_stats.append(key) @@ -77,7 +75,7 @@ class LogStatsParser(TimeSeriesData): # ... # rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n # ..." - new_lines = log.get_message().split('\n') + new_lines = log.get_message().split("\n") # let log_ts = 1532518219 log_ts = log.get_timestamp() # example updates to keys_ts: @@ -95,20 +93,17 @@ class LogStatsParser(TimeSeriesData): # this method parses the Rocksdb LOG file and generates timeseries for # each of the statistic in the list reqd_stats self.keys_ts = {NO_ENTITY: {}} - for file_name in glob.glob(self.logs_file_prefix + '*'): + for file_name in glob.glob(self.logs_file_prefix + "*"): # TODO(poojam23): find a way to distinguish between 'old' log files # from current and previous experiments, present in the same # directory - if re.search('old', file_name, re.IGNORECASE): + if re.search("old", file_name, re.IGNORECASE): continue - with open(file_name, 'r') as db_logs: + with open(file_name, "r") as db_logs: new_log = None for line in db_logs: if Log.is_new_log(line): - if ( - new_log and - re.search(self.STATS, new_log.get_message()) - ): + if new_log and re.search(self.STATS, new_log.get_message()): self.add_to_timeseries(new_log, reqd_stats) new_log = Log(line, column_families=[]) else: @@ -123,13 +118,13 @@ class DatabasePerfContext(TimeSeriesData): # TODO(poojam23): check if any benchrunner provides PerfContext sampled at # regular intervals def __init__(self, perf_context_ts, stats_freq_sec, cumulative): - ''' + """ perf_context_ts is expected to be in the following format: Dict[metric, Dict[timestamp, value]], where for each (metric, timestamp) pair, the value is database-wide (i.e. summed over all the threads involved) if stats_freq_sec == 0, per-metric only one value is reported - ''' + """ super().__init__() self.stats_freq_sec = stats_freq_sec self.keys_ts = {NO_ENTITY: perf_context_ts} @@ -148,11 +143,11 @@ class DatabasePerfContext(TimeSeriesData): continue for ix, ts in enumerate(timeseries[:-1]): epoch_ts[NO_ENTITY][stat][ts] = ( - epoch_ts[NO_ENTITY][stat][ts] - - epoch_ts[NO_ENTITY][stat][timeseries[ix+1]] + epoch_ts[NO_ENTITY][stat][ts] + - epoch_ts[NO_ENTITY][stat][timeseries[ix + 1]] ) if epoch_ts[NO_ENTITY][stat][ts] < 0: - raise ValueError('DBPerfContext: really cumulative?') + raise ValueError("DBPerfContext: really cumulative?") # drop the smallest timestamp in the timeseries for this metric epoch_ts[NO_ENTITY][stat].pop(timeseries[-1]) self.keys_ts = epoch_ts @@ -171,8 +166,8 @@ class DatabasePerfContext(TimeSeriesData): class OdsStatsFetcher(TimeSeriesData): # class constants - OUTPUT_FILE = 'temp/stats_out.tmp' - ERROR_FILE = 'temp/stats_err.tmp' + OUTPUT_FILE = "temp/stats_out.tmp" + ERROR_FILE = "temp/stats_err.tmp" RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime" # static methods @@ -183,9 +178,9 @@ class OdsStatsFetcher(TimeSeriesData): @staticmethod def _get_time_value_pair(pair_string): # example pair_string: '[1532544591, 97.3653601828]' - pair_string = pair_string.replace('[', '') - pair_string = pair_string.replace(']', '') - pair = pair_string.split(',') + pair_string = pair_string.replace("[", "") + pair_string = pair_string.replace("]", "") + pair = pair_string.split(",") first = int(pair[0].strip()) second = float(pair[1].strip()) return [first, second] @@ -193,12 +188,10 @@ class OdsStatsFetcher(TimeSeriesData): @staticmethod def _get_ods_cli_stime(start_time): diff = int(time.time() - int(start_time)) - stime = str(diff) + '_s' + stime = str(diff) + "_s" return stime - def __init__( - self, client, entities, start_time, end_time, key_prefix=None - ): + def __init__(self, client, entities, start_time, end_time, key_prefix=None): super().__init__() self.client = client self.entities = entities @@ -209,7 +202,7 @@ class OdsStatsFetcher(TimeSeriesData): self.duration_sec = 60 def execute_script(self, command): - print('executing...') + print("executing...") print(command) out_file = open(self.OUTPUT_FILE, "w+") err_file = open(self.ERROR_FILE, "w+") @@ -222,9 +215,9 @@ class OdsStatsFetcher(TimeSeriesData): # \t\t[[ts, value], [ts, value], ...] # ts = timestamp; value = value of key_name in entity_name at time ts self.keys_ts = {} - with open(self.OUTPUT_FILE, 'r') as fp: + with open(self.OUTPUT_FILE, "r") as fp: for line in fp: - token_list = line.strip().split('\t') + token_list = line.strip().split("\t") entity = token_list[0] key = token_list[1] if entity not in self.keys_ts: @@ -233,7 +226,7 @@ class OdsStatsFetcher(TimeSeriesData): self.keys_ts[entity][key] = {} list_of_lists = [ self._get_time_value_pair(pair_string) - for pair_string in token_list[2].split('],') + for pair_string in token_list[2].split("],") ] value = {pair[0]: pair[1] for pair in list_of_lists} self.keys_ts[entity][key] = value @@ -243,7 +236,7 @@ class OdsStatsFetcher(TimeSeriesData): # \t\t\t # there is one line per (entity_name, key_name, timestamp) self.keys_ts = {} - with open(self.OUTPUT_FILE, 'r') as fp: + with open(self.OUTPUT_FILE, "r") as fp: for line in fp: token_list = line.split() entity = token_list[0] @@ -257,25 +250,29 @@ class OdsStatsFetcher(TimeSeriesData): def fetch_timeseries(self, statistics): # this method fetches the timeseries of required stats from the ODS # service and populates the 'keys_ts' object appropriately - print('OdsStatsFetcher: fetching ' + str(statistics)) - if re.search('rapido', self.client, re.IGNORECASE): + print("OdsStatsFetcher: fetching " + str(statistics)) + if re.search("rapido", self.client, re.IGNORECASE): command = self.RAPIDO_COMMAND % ( self.client, self._get_string_in_quotes(self.entities), - self._get_string_in_quotes(','.join(statistics)), + self._get_string_in_quotes(",".join(statistics)), self._get_string_in_quotes(self.start_time), - self._get_string_in_quotes(self.end_time) + self._get_string_in_quotes(self.end_time), ) # Run the tool and fetch the time-series data self.execute_script(command) # Parse output and populate the 'keys_ts' map self.parse_rapido_output() - elif re.search('ods', self.client, re.IGNORECASE): + elif re.search("ods", self.client, re.IGNORECASE): command = ( - self.client + ' ' + - '--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' + - self._get_string_in_quotes(self.entities) + ' ' + - self._get_string_in_quotes(','.join(statistics)) + self.client + + " " + + "--stime=" + + self._get_ods_cli_stime(self.start_time) + + " " + + self._get_string_in_quotes(self.entities) + + " " + + self._get_string_in_quotes(",".join(statistics)) ) # Run the tool and fetch the time-series data self.execute_script(command) @@ -287,7 +284,7 @@ class OdsStatsFetcher(TimeSeriesData): for cond in conditions: for key in cond.keys: use_prefix = False - if key.startswith('[]'): + if key.startswith("[]"): use_prefix = True key = key[2:] # TODO(poojam23): this is very hacky and needs to be improved @@ -295,15 +292,15 @@ class OdsStatsFetcher(TimeSeriesData): key += ".60" if use_prefix: if not self.key_prefix: - print('Warning: OdsStatsFetcher might need key prefix') - print('for the key: ' + key) + print("Warning: OdsStatsFetcher might need key prefix") + print("for the key: " + key) else: key = self.key_prefix + "." + key reqd_stats.append(key) return reqd_stats - def fetch_rate_url(self, entities, keys, window_len, percent, display): - # type: (List[str], List[str], str, str, bool) -> str + def fetch_rate_url(self, entities: List[str], keys: List[str], + window_len: str, percent: str, display: bool) -> str: transform_desc = ( "rate(" + str(window_len) + ",duration=" + str(self.duration_sec) ) @@ -311,28 +308,33 @@ class OdsStatsFetcher(TimeSeriesData): transform_desc = transform_desc + ",%)" else: transform_desc = transform_desc + ")" - if re.search('rapido', self.client, re.IGNORECASE): + if re.search("rapido", self.client, re.IGNORECASE): command = self.RAPIDO_COMMAND + " --transform=%s --url=%s" command = command % ( self.client, - self._get_string_in_quotes(','.join(entities)), - self._get_string_in_quotes(','.join(keys)), + self._get_string_in_quotes(",".join(entities)), + self._get_string_in_quotes(",".join(keys)), self._get_string_in_quotes(self.start_time), self._get_string_in_quotes(self.end_time), self._get_string_in_quotes(transform_desc), - self._get_string_in_quotes(display) + self._get_string_in_quotes(display), ) - elif re.search('ods', self.client, re.IGNORECASE): + elif re.search("ods", self.client, re.IGNORECASE): command = ( - self.client + ' ' + - '--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' + - '--fburlonly ' + - self._get_string_in_quotes(entities) + ' ' + - self._get_string_in_quotes(','.join(keys)) + ' ' + - self._get_string_in_quotes(transform_desc) + self.client + + " " + + "--stime=" + + self._get_ods_cli_stime(self.start_time) + + " " + + "--fburlonly " + + self._get_string_in_quotes(entities) + + " " + + self._get_string_in_quotes(",".join(keys)) + + " " + + self._get_string_in_quotes(transform_desc) ) self.execute_script(command) url = "" - with open(self.OUTPUT_FILE, 'r') as fp: + with open(self.OUTPUT_FILE, "r") as fp: url = fp.readline() return url diff --git a/tools/advisor/advisor/db_timeseries_parser.py b/tools/advisor/advisor/db_timeseries_parser.py index 308eb139a..9e30008b4 100644 --- a/tools/advisor/advisor/db_timeseries_parser.py +++ b/tools/advisor/advisor/db_timeseries_parser.py @@ -3,13 +3,15 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). +import math from abc import abstractmethod -from advisor.db_log_parser import DataSource from enum import Enum -import math +from typing import Dict + +from advisor.db_log_parser import DataSource -NO_ENTITY = 'ENTITY_PLACEHOLDER' +NO_ENTITY = "ENTITY_PLACEHOLDER" class TimeSeriesData(DataSource): @@ -42,10 +44,8 @@ class TimeSeriesData(DataSource): # for each of them and populates the 'keys_ts' dictionary pass - def fetch_burst_epochs( - self, entities, statistic, window_sec, threshold, percent - ): - # type: (str, int, float, bool) -> Dict[str, Dict[int, float]] + def fetch_burst_epochs(self, entities: str, statistic: int, + window_sec: float, threshold: bool, percent: bool) -> Dict[str, Dict[int, float]]: # this method calculates the (percent) rate change in the 'statistic' # for each entity (over 'window_sec' seconds) and returns the epochs # where this rate change is greater than or equal to the 'threshold' @@ -90,7 +90,6 @@ class TimeSeriesData(DataSource): return burst_epochs def fetch_aggregated_values(self, entity, statistics, aggregation_op): - # type: (str, AggregationOperator) -> Dict[str, float] # this method performs the aggregation specified by 'aggregation_op' # on the timeseries of 'statistics' for 'entity' and returns: # Dict[statistic, aggregated_value] @@ -145,7 +144,7 @@ class TimeSeriesData(DataSource): complete_keys[0], # there should be only one key cond.window_sec, cond.rate_threshold, - True + True, ) # Trigger in this case is: # Dict[entity_name, Dict[timestamp, rate_change]] @@ -156,32 +155,28 @@ class TimeSeriesData(DataSource): cond.set_trigger(result) elif cond.behavior is self.Behavior.evaluate_expression: self.handle_evaluate_expression( - cond, - complete_keys, - entities_with_stats + cond, complete_keys, entities_with_stats ) def handle_evaluate_expression(self, condition, statistics, entities): trigger = {} # check 'condition' for each of these entities for entity in entities: - if hasattr(condition, 'aggregation_op'): + if hasattr(condition, "aggregation_op"): # in this case, the aggregation operation is performed on each # of the condition's 'keys' and then with aggregated values # condition's 'expression' is evaluated; if it evaluates to # True, then list of the keys values is added to the # condition's trigger: Dict[entity_name, List[stats]] result = self.fetch_aggregated_values( - entity, statistics, condition.aggregation_op + entity, statistics, condition.aggregation_op ) keys = [result[key] for key in statistics] try: if eval(condition.expression): trigger[entity] = keys except Exception as e: - print( - 'WARNING(TimeSeriesData) check_and_trigger: ' + str(e) - ) + print("WARNING(TimeSeriesData) check_and_trigger: " + str(e)) else: # assumption: all stats have same series of timestamps # this is similar to the above but 'expression' is evaluated at @@ -190,19 +185,13 @@ class TimeSeriesData(DataSource): # 'expression' evaluated to true; so trigger is: # Dict[entity, Dict[timestamp, List[stats]]] for epoch in self.keys_ts[entity][statistics[0]].keys(): - keys = [ - self.keys_ts[entity][key][epoch] - for key in statistics - ] + keys = [self.keys_ts[entity][key][epoch] for key in statistics] try: if eval(condition.expression): if entity not in trigger: trigger[entity] = {} trigger[entity][epoch] = keys except Exception as e: - print( - 'WARNING(TimeSeriesData) check_and_trigger: ' + - str(e) - ) + print("WARNING(TimeSeriesData) check_and_trigger: " + str(e)) if trigger: condition.set_trigger(trigger) diff --git a/tools/advisor/advisor/ini_parser.py b/tools/advisor/advisor/ini_parser.py index 4776ef209..3379ea3cd 100644 --- a/tools/advisor/advisor/ini_parser.py +++ b/tools/advisor/advisor/ini_parser.py @@ -17,7 +17,7 @@ class IniParser: @staticmethod def remove_trailing_comment(line): line = line.strip() - comment_start = line.find('#') + comment_start = line.find("#") if comment_start > -1: return line[:comment_start] return line @@ -27,7 +27,7 @@ class IniParser: # A section header looks like: [Rule "my-new-rule"]. Essentially, # a line that is in square-brackets. line = line.strip() - if line.startswith('[') and line.endswith(']'): + if line.startswith("[") and line.endswith("]"): return True return False @@ -38,7 +38,7 @@ class IniParser: token_list = line.strip()[1:-1].split('"') if len(token_list) < 3: error = 'needed section header: [ ""]' - raise ValueError('Parsing error: ' + error + '\n' + line) + raise ValueError("Parsing error: " + error + "\n" + line) return token_list[1] @staticmethod @@ -47,22 +47,22 @@ class IniParser: if not line: return IniParser.Element.comment if IniParser.is_section_header(line): - if line.strip()[1:-1].startswith('Suggestion'): + if line.strip()[1:-1].startswith("Suggestion"): return IniParser.Element.sugg - if line.strip()[1:-1].startswith('Rule'): + if line.strip()[1:-1].startswith("Rule"): return IniParser.Element.rule - if line.strip()[1:-1].startswith('Condition'): + if line.strip()[1:-1].startswith("Condition"): return IniParser.Element.cond - if '=' in line: + if "=" in line: return IniParser.Element.key_val - error = 'not a recognizable RulesSpec element' - raise ValueError('Parsing error: ' + error + '\n' + line) + error = "not a recognizable RulesSpec element" + raise ValueError("Parsing error: " + error + "\n" + line) @staticmethod def get_key_value_pair(line): line = line.strip() - key = line.split('=')[0].strip() - value = "=".join(line.split('=')[1:]) + key = line.split("=")[0].strip() + value = "=".join(line.split("=")[1:]) if value == "": # if the option has no value return (key, None) values = IniParser.get_list_from_value(value) @@ -72,5 +72,5 @@ class IniParser: @staticmethod def get_list_from_value(value): - values = value.strip().split(':') + values = value.strip().split(":") return values diff --git a/tools/advisor/advisor/rule_parser.py b/tools/advisor/advisor/rule_parser.py index 592218f4a..8b484db48 100644 --- a/tools/advisor/advisor/rule_parser.py +++ b/tools/advisor/advisor/rule_parser.py @@ -3,12 +3,13 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). +import re from abc import ABC, abstractmethod +from enum import Enum + from advisor.db_log_parser import DataSource, NO_COL_FAMILY from advisor.db_timeseries_parser import TimeSeriesData -from enum import Enum from advisor.ini_parser import IniParser -import re class Section(ABC): @@ -38,17 +39,17 @@ class Rule(Section): # value will be a string and not a list. Hence, convert it to a single # element list before storing it in self.suggestions or # self.conditions. - if key == 'conditions': + if key == "conditions": if isinstance(value, str): self.conditions = [value] else: self.conditions = value - elif key == 'suggestions': + elif key == "suggestions": if isinstance(value, str): self.suggestions = [value] else: self.suggestions = value - elif key == 'overlap_time_period': + elif key == "overlap_time_period": self.overlap_time_seconds = value def get_suggestions(self): @@ -56,35 +57,29 @@ class Rule(Section): def perform_checks(self): if not self.conditions or len(self.conditions) < 1: - raise ValueError( - self.name + ': rule must have at least one condition' - ) + raise ValueError(self.name + ": rule must have at least one condition") if not self.suggestions or len(self.suggestions) < 1: - raise ValueError( - self.name + ': rule must have at least one suggestion' - ) + raise ValueError(self.name + ": rule must have at least one suggestion") if self.overlap_time_seconds: if len(self.conditions) != 2: raise ValueError( - self.name + ": rule must be associated with 2 conditions\ + self.name + + ": rule must be associated with 2 conditions\ in order to check for a time dependency between them" ) - time_format = '^\d+[s|m|h|d]$' - if ( - not - re.match(time_format, self.overlap_time_seconds, re.IGNORECASE) - ): + time_format = "^\d+[s|m|h|d]$" # noqa + if not re.match(time_format, self.overlap_time_seconds, re.IGNORECASE): raise ValueError( self.name + ": overlap_time_seconds format: \d+[s|m|h|d]" ) else: # convert to seconds in_seconds = int(self.overlap_time_seconds[:-1]) - if self.overlap_time_seconds[-1] == 'm': + if self.overlap_time_seconds[-1] == "m": in_seconds *= 60 - elif self.overlap_time_seconds[-1] == 'h': - in_seconds *= (60 * 60) - elif self.overlap_time_seconds[-1] == 'd': - in_seconds *= (24 * 60 * 60) + elif self.overlap_time_seconds[-1] == "h": + in_seconds *= 60 * 60 + elif self.overlap_time_seconds[-1] == "d": + in_seconds *= 24 * 60 * 60 self.overlap_time_seconds = in_seconds def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs): @@ -93,28 +88,25 @@ class Rule(Section): # (if present) the first pair of timestamps at which the 2 conditions # were triggered within 'overlap_time_seconds' of each other key1_lower_bounds = [ - epoch - self.overlap_time_seconds - for epoch in key1_trigger_epochs + epoch - self.overlap_time_seconds for epoch in key1_trigger_epochs ] key1_lower_bounds.sort() key2_trigger_epochs.sort() trigger_ix = 0 overlap_pair = None for key1_lb in key1_lower_bounds: - while ( - key2_trigger_epochs[trigger_ix] < key1_lb and - trigger_ix < len(key2_trigger_epochs) + while key2_trigger_epochs[trigger_ix] < key1_lb and trigger_ix < len( + key2_trigger_epochs ): trigger_ix += 1 if trigger_ix >= len(key2_trigger_epochs): break - if ( - key2_trigger_epochs[trigger_ix] <= - key1_lb + (2 * self.overlap_time_seconds) + if key2_trigger_epochs[trigger_ix] <= key1_lb + ( + 2 * self.overlap_time_seconds ): overlap_pair = ( key2_trigger_epochs[trigger_ix], - key1_lb + self.overlap_time_seconds + key1_lb + self.overlap_time_seconds, ) break return overlap_pair @@ -130,10 +122,10 @@ class Rule(Section): condition1 = conditions_dict[self.conditions[0]] condition2 = conditions_dict[self.conditions[1]] if not ( - condition1.get_data_source() is DataSource.Type.TIME_SERIES and - condition2.get_data_source() is DataSource.Type.TIME_SERIES + condition1.get_data_source() is DataSource.Type.TIME_SERIES + and condition2.get_data_source() is DataSource.Type.TIME_SERIES ): - raise ValueError(self.name + ': need 2 timeseries conditions') + raise ValueError(self.name + ": need 2 timeseries conditions") map1 = condition1.get_trigger() map2 = condition2.get_trigger() @@ -142,14 +134,10 @@ class Rule(Section): self.trigger_entities = {} is_triggered = False - entity_intersection = ( - set(map1.keys()).intersection(set(map2.keys())) - ) + entity_intersection = set(map1.keys()).intersection(set(map2.keys())) for entity in entity_intersection: - overlap_timestamps_pair = ( - self.get_overlap_timestamps( - list(map1[entity].keys()), list(map2[entity].keys()) - ) + overlap_timestamps_pair = self.get_overlap_timestamps( + list(map1[entity].keys()), list(map2[entity].keys()) ) if overlap_timestamps_pair: self.trigger_entities[entity] = overlap_timestamps_pair @@ -166,8 +154,8 @@ class Rule(Section): all_conditions_triggered = False break if ( - cond.get_data_source() is DataSource.Type.LOG or - cond.get_data_source() is DataSource.Type.DB_OPTIONS + cond.get_data_source() is DataSource.Type.LOG + or cond.get_data_source() is DataSource.Type.DB_OPTIONS ): cond_col_fam = set(cond.get_trigger().keys()) if NO_COL_FAMILY in cond_col_fam: @@ -180,8 +168,8 @@ class Rule(Section): if self.trigger_entities is None: self.trigger_entities = cond_entities else: - self.trigger_entities = ( - self.trigger_entities.intersection(cond_entities) + self.trigger_entities = self.trigger_entities.intersection( + cond_entities ) if not (self.trigger_entities or self.trigger_column_families): all_conditions_triggered = False @@ -200,7 +188,7 @@ class Rule(Section): rule_string += cond is_first = False else: - rule_string += (" AND " + cond) + rule_string += " AND " + cond # Append suggestions rule_string += "\nsuggestions:: " is_first = True @@ -209,11 +197,11 @@ class Rule(Section): rule_string += sugg is_first = False else: - rule_string += (", " + sugg) + rule_string += ", " + sugg if self.trigger_entities: - rule_string += (', entities:: ' + str(self.trigger_entities)) + rule_string += ", entities:: " + str(self.trigger_entities) if self.trigger_column_families: - rule_string += (', col_fam:: ' + str(self.trigger_column_families)) + rule_string += ", col_fam:: " + str(self.trigger_column_families) # Return constructed string return rule_string @@ -232,7 +220,7 @@ class Suggestion(Section): self.description = None def set_parameter(self, key, value): - if key == 'option': + if key == "option": # Note: # case 1: 'option' is supported by Rocksdb OPTIONS file; in this # case the option belongs to one of the sections in the config @@ -240,41 +228,35 @@ class Suggestion(Section): # case 2: 'option' is not supported by Rocksdb OPTIONS file; the # option is not expected to have the character '.' in its name self.option = value - elif key == 'action': + elif key == "action": if self.option and not value: - raise ValueError(self.name + ': provide action for option') + raise ValueError(self.name + ": provide action for option") self.action = self.Action[value] - elif key == 'suggested_values': + elif key == "suggested_values": if isinstance(value, str): self.suggested_values = [value] else: self.suggested_values = value - elif key == 'description': + elif key == "description": self.description = value def perform_checks(self): if not self.description: if not self.option: - raise ValueError(self.name + ': provide option or description') + raise ValueError(self.name + ": provide option or description") if not self.action: - raise ValueError(self.name + ': provide action for option') + raise ValueError(self.name + ": provide action for option") if self.action is self.Action.set and not self.suggested_values: - raise ValueError( - self.name + ': provide suggested value for option' - ) + raise ValueError(self.name + ": provide suggested value for option") def __repr__(self): sugg_string = "Suggestion: " + self.name if self.description: - sugg_string += (' description : ' + self.description) + sugg_string += " description : " + self.description else: - sugg_string += ( - ' option : ' + self.option + ' action : ' + self.action.name - ) + sugg_string += " option : " + self.option + " action : " + self.action.name if self.suggested_values: - sugg_string += ( - ' suggested_values : ' + str(self.suggested_values) - ) + sugg_string += " suggested_values : " + str(self.suggested_values) return sugg_string @@ -286,7 +268,7 @@ class Condition(Section): def perform_checks(self): if not self.data_source: - raise ValueError(self.name + ': condition not tied to data source') + raise ValueError(self.name + ": condition not tied to data source") def set_data_source(self, data_source): self.data_source = data_source @@ -310,28 +292,28 @@ class Condition(Section): def set_parameter(self, key, value): # must be defined by the subclass - raise NotImplementedError(self.name + ': provide source for condition') + raise NotImplementedError(self.name + ": provide source for condition") class LogCondition(Condition): @classmethod def create(cls, base_condition): - base_condition.set_data_source(DataSource.Type['LOG']) + base_condition.set_data_source(DataSource.Type["LOG"]) base_condition.__class__ = cls return base_condition def set_parameter(self, key, value): - if key == 'regex': + if key == "regex": self.regex = value def perform_checks(self): super().perform_checks() if not self.regex: - raise ValueError(self.name + ': provide regex for log condition') + raise ValueError(self.name + ": provide regex for log condition") def __repr__(self): log_cond_str = "LogCondition: " + self.name - log_cond_str += (" regex: " + self.regex) + log_cond_str += " regex: " + self.regex # if self.trigger: # log_cond_str += (" trigger: " + str(self.trigger)) return log_cond_str @@ -340,90 +322,90 @@ class LogCondition(Condition): class OptionCondition(Condition): @classmethod def create(cls, base_condition): - base_condition.set_data_source(DataSource.Type['DB_OPTIONS']) + base_condition.set_data_source(DataSource.Type["DB_OPTIONS"]) base_condition.__class__ = cls return base_condition def set_parameter(self, key, value): - if key == 'options': + if key == "options": if isinstance(value, str): self.options = [value] else: self.options = value - elif key == 'evaluate': + elif key == "evaluate": self.eval_expr = value def perform_checks(self): super().perform_checks() if not self.options: - raise ValueError(self.name + ': options missing in condition') + raise ValueError(self.name + ": options missing in condition") if not self.eval_expr: - raise ValueError(self.name + ': expression missing in condition') + raise ValueError(self.name + ": expression missing in condition") def __repr__(self): opt_cond_str = "OptionCondition: " + self.name - opt_cond_str += (" options: " + str(self.options)) - opt_cond_str += (" expression: " + self.eval_expr) + opt_cond_str += " options: " + str(self.options) + opt_cond_str += " expression: " + self.eval_expr if self.trigger: - opt_cond_str += (" trigger: " + str(self.trigger)) + opt_cond_str += " trigger: " + str(self.trigger) return opt_cond_str class TimeSeriesCondition(Condition): @classmethod def create(cls, base_condition): - base_condition.set_data_source(DataSource.Type['TIME_SERIES']) + base_condition.set_data_source(DataSource.Type["TIME_SERIES"]) base_condition.__class__ = cls return base_condition def set_parameter(self, key, value): - if key == 'keys': + if key == "keys": if isinstance(value, str): self.keys = [value] else: self.keys = value - elif key == 'behavior': + elif key == "behavior": self.behavior = TimeSeriesData.Behavior[value] - elif key == 'rate_threshold': + elif key == "rate_threshold": self.rate_threshold = float(value) - elif key == 'window_sec': + elif key == "window_sec": self.window_sec = int(value) - elif key == 'evaluate': + elif key == "evaluate": self.expression = value - elif key == 'aggregation_op': + elif key == "aggregation_op": self.aggregation_op = TimeSeriesData.AggregationOperator[value] def perform_checks(self): if not self.keys: - raise ValueError(self.name + ': specify timeseries key') + raise ValueError(self.name + ": specify timeseries key") if not self.behavior: - raise ValueError(self.name + ': specify triggering behavior') + raise ValueError(self.name + ": specify triggering behavior") if self.behavior is TimeSeriesData.Behavior.bursty: if not self.rate_threshold: - raise ValueError(self.name + ': specify rate burst threshold') + raise ValueError(self.name + ": specify rate burst threshold") if not self.window_sec: self.window_sec = 300 # default window length is 5 minutes if len(self.keys) > 1: - raise ValueError(self.name + ': specify only one key') + raise ValueError(self.name + ": specify only one key") elif self.behavior is TimeSeriesData.Behavior.evaluate_expression: if not (self.expression): - raise ValueError(self.name + ': specify evaluation expression') + raise ValueError(self.name + ": specify evaluation expression") else: - raise ValueError(self.name + ': trigger behavior not supported') + raise ValueError(self.name + ": trigger behavior not supported") def __repr__(self): ts_cond_str = "TimeSeriesCondition: " + self.name - ts_cond_str += (" statistics: " + str(self.keys)) - ts_cond_str += (" behavior: " + self.behavior.name) + ts_cond_str += " statistics: " + str(self.keys) + ts_cond_str += " behavior: " + self.behavior.name if self.behavior is TimeSeriesData.Behavior.bursty: - ts_cond_str += (" rate_threshold: " + str(self.rate_threshold)) - ts_cond_str += (" window_sec: " + str(self.window_sec)) + ts_cond_str += " rate_threshold: " + str(self.rate_threshold) + ts_cond_str += " window_sec: " + str(self.window_sec) if self.behavior is TimeSeriesData.Behavior.evaluate_expression: - ts_cond_str += (" expression: " + self.expression) - if hasattr(self, 'aggregation_op'): - ts_cond_str += (" aggregation_op: " + self.aggregation_op.name) + ts_cond_str += " expression: " + self.expression + if hasattr(self, "aggregation_op"): + ts_cond_str += " aggregation_op: " + self.aggregation_op.name if self.trigger: - ts_cond_str += (" trigger: " + str(self.trigger)) + ts_cond_str += " trigger: " + str(self.trigger) return ts_cond_str @@ -446,7 +428,7 @@ class RulesSpec: def load_rules_from_spec(self): self.initialise_fields() - with open(self.file_path, 'r') as db_rules: + with open(self.file_path, "r") as db_rules: curr_section = None for line in db_rules: line = IniParser.remove_trailing_comment(line) @@ -472,12 +454,12 @@ class RulesSpec: if curr_section is IniParser.Element.rule: new_rule.set_parameter(key, value) elif curr_section is IniParser.Element.cond: - if key == 'source': - if value == 'LOG': + if key == "source": + if value == "LOG": new_cond = LogCondition.create(new_cond) - elif value == 'OPTIONS': + elif value == "OPTIONS": new_cond = OptionCondition.create(new_cond) - elif value == 'TIME_SERIES': + elif value == "TIME_SERIES": new_cond = TimeSeriesCondition.create(new_cond) else: new_cond.set_parameter(key, value) @@ -515,14 +497,14 @@ class RulesSpec: def print_rules(self, rules): for rule in rules: - print('\nRule: ' + rule.name) + print("\nRule: " + rule.name) for cond_name in rule.conditions: print(repr(self.conditions_dict[cond_name])) for sugg_name in rule.suggestions: print(repr(self.suggestions_dict[sugg_name])) if rule.trigger_entities: - print('scope: entities:') + print("scope: entities:") print(rule.trigger_entities) if rule.trigger_column_families: - print('scope: col_fam:') + print("scope: col_fam:") print(rule.trigger_column_families) diff --git a/tools/advisor/advisor/rule_parser_example.py b/tools/advisor/advisor/rule_parser_example.py index d2348e5ae..6c04ff2bf 100644 --- a/tools/advisor/advisor/rule_parser_example.py +++ b/tools/advisor/advisor/rule_parser_example.py @@ -3,11 +3,12 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.rule_parser import RulesSpec +import argparse + from advisor.db_log_parser import DatabaseLogs, DataSource from advisor.db_options_parser import DatabaseOptions from advisor.db_stats_fetcher import LogStatsParser, OdsStatsFetcher -import argparse +from advisor.rule_parser import RulesSpec def main(args): @@ -18,9 +19,7 @@ def main(args): # initialize the DatabaseOptions object db_options = DatabaseOptions(args.rocksdb_options) # Create DatabaseLogs object - db_logs = DatabaseLogs( - args.log_files_path_prefix, db_options.get_column_families() - ) + db_logs = DatabaseLogs(args.log_files_path_prefix, db_options.get_column_families()) # Create the Log STATS object db_log_stats = LogStatsParser( args.log_files_path_prefix, args.stats_dump_period_sec @@ -28,62 +27,72 @@ def main(args): data_sources = { DataSource.Type.DB_OPTIONS: [db_options], DataSource.Type.LOG: [db_logs], - DataSource.Type.TIME_SERIES: [db_log_stats] + DataSource.Type.TIME_SERIES: [db_log_stats], } if args.ods_client: - data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher( - args.ods_client, - args.ods_entity, - args.ods_tstart, - args.ods_tend, - args.ods_key_prefix - )) + data_sources[DataSource.Type.TIME_SERIES].append( + OdsStatsFetcher( + args.ods_client, + args.ods_entity, + args.ods_tstart, + args.ods_tend, + args.ods_key_prefix, + ) + ) triggered_rules = rule_spec_parser.get_triggered_rules( data_sources, db_options.get_column_families() ) rule_spec_parser.print_rules(triggered_rules) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Use this script to get\ - suggestions for improving Rocksdb performance.') - parser.add_argument( - '--rules_spec', required=True, type=str, - help='path of the file containing the expert-specified Rules' +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Use this script to get\ + suggestions for improving Rocksdb performance." ) parser.add_argument( - '--rocksdb_options', required=True, type=str, - help='path of the starting Rocksdb OPTIONS file' + "--rules_spec", + required=True, + type=str, + help="path of the file containing the expert-specified Rules", ) parser.add_argument( - '--log_files_path_prefix', required=True, type=str, - help='path prefix of the Rocksdb LOG files' + "--rocksdb_options", + required=True, + type=str, + help="path of the starting Rocksdb OPTIONS file", ) parser.add_argument( - '--stats_dump_period_sec', required=True, type=int, - help='the frequency (in seconds) at which STATISTICS are printed to ' + - 'the Rocksdb LOG file' + "--log_files_path_prefix", + required=True, + type=str, + help="path prefix of the Rocksdb LOG files", ) - # ODS arguments parser.add_argument( - '--ods_client', type=str, help='the ODS client binary' + "--stats_dump_period_sec", + required=True, + type=int, + help="the frequency (in seconds) at which STATISTICS are printed to " + + "the Rocksdb LOG file", ) + # ODS arguments + parser.add_argument("--ods_client", type=str, help="the ODS client binary") parser.add_argument( - '--ods_entity', type=str, - help='the servers for which the ODS stats need to be fetched' + "--ods_entity", + type=str, + help="the servers for which the ODS stats need to be fetched", ) parser.add_argument( - '--ods_key_prefix', type=str, - help='the prefix that needs to be attached to the keys of time ' + - 'series to be fetched from ODS' + "--ods_key_prefix", + type=str, + help="the prefix that needs to be attached to the keys of time " + + "series to be fetched from ODS", ) parser.add_argument( - '--ods_tstart', type=int, - help='start time of timeseries to be fetched from ODS' + "--ods_tstart", type=int, help="start time of timeseries to be fetched from ODS" ) parser.add_argument( - '--ods_tend', type=int, - help='end time of timeseries to be fetched from ODS' + "--ods_tend", type=int, help="end time of timeseries to be fetched from ODS" ) args = parser.parse_args() main(args) diff --git a/tools/advisor/test/test_db_bench_runner.py b/tools/advisor/test/test_db_bench_runner.py index 1c4f77d50..57306c942 100644 --- a/tools/advisor/test/test_db_bench_runner.py +++ b/tools/advisor/test/test_db_bench_runner.py @@ -3,24 +3,25 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.db_bench_runner import DBBenchRunner -from advisor.db_log_parser import NO_COL_FAMILY, DataSource -from advisor.db_options_parser import DatabaseOptions import os import unittest +from advisor.db_bench_runner import DBBenchRunner +from advisor.db_log_parser import DataSource, NO_COL_FAMILY +from advisor.db_options_parser import DatabaseOptions + class TestDBBenchRunnerMethods(unittest.TestCase): def setUp(self): self.pos_args = [ - './../../db_bench', - 'overwrite', - 'use_existing_db=true', - 'duration=10' + "./../../db_bench", + "overwrite", + "use_existing_db=true", + "duration=10", ] self.bench_runner = DBBenchRunner(self.pos_args) this_path = os.path.abspath(os.path.dirname(__file__)) - options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') + options_path = os.path.join(this_path, "input_files/OPTIONS-000005") self.db_options = DatabaseOptions(options_path) def test_setup(self): @@ -31,71 +32,70 @@ class TestDBBenchRunnerMethods(unittest.TestCase): ) def test_get_info_log_file_name(self): - log_file_name = DBBenchRunner.get_info_log_file_name( - None, 'random_path' - ) - self.assertEqual(log_file_name, 'LOG') + log_file_name = DBBenchRunner.get_info_log_file_name(None, "random_path") + self.assertEqual(log_file_name, "LOG") log_file_name = DBBenchRunner.get_info_log_file_name( - '/dev/shm/', '/tmp/rocksdbtest-155919/dbbench/' + "/dev/shm/", "/tmp/rocksdbtest-155919/dbbench/" ) - self.assertEqual(log_file_name, 'tmp_rocksdbtest-155919_dbbench_LOG') + self.assertEqual(log_file_name, "tmp_rocksdbtest-155919_dbbench_LOG") def test_get_opt_args_str(self): - misc_opt_dict = {'bloom_bits': 2, 'empty_opt': None, 'rate_limiter': 3} + misc_opt_dict = {"bloom_bits": 2, "empty_opt": None, "rate_limiter": 3} optional_args_str = DBBenchRunner.get_opt_args_str(misc_opt_dict) - self.assertEqual(optional_args_str, ' --bloom_bits=2 --rate_limiter=3') + self.assertEqual(optional_args_str, " --bloom_bits=2 --rate_limiter=3") def test_get_log_options(self): - db_path = '/tmp/rocksdb-155919/dbbench' + db_path = "/tmp/rocksdb-155919/dbbench" # when db_log_dir is present in the db_options update_dict = { - 'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'}, - 'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '20'} + "DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"}, + "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "20"}, } self.db_options.update_options(update_dict) log_file_prefix, stats_freq = self.bench_runner.get_log_options( self.db_options, db_path ) - self.assertEqual( - log_file_prefix, '/dev/shm/tmp_rocksdb-155919_dbbench_LOG' - ) + self.assertEqual(log_file_prefix, "/dev/shm/tmp_rocksdb-155919_dbbench_LOG") self.assertEqual(stats_freq, 20) update_dict = { - 'DBOptions.db_log_dir': {NO_COL_FAMILY: None}, - 'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '30'} + "DBOptions.db_log_dir": {NO_COL_FAMILY: None}, + "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "30"}, } self.db_options.update_options(update_dict) log_file_prefix, stats_freq = self.bench_runner.get_log_options( self.db_options, db_path ) - self.assertEqual(log_file_prefix, '/tmp/rocksdb-155919/dbbench/LOG') + self.assertEqual(log_file_prefix, "/tmp/rocksdb-155919/dbbench/LOG") self.assertEqual(stats_freq, 30) def test_build_experiment_command(self): # add some misc_options to db_options update_dict = { - 'bloom_bits': {NO_COL_FAMILY: 2}, - 'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: 128000000} + "bloom_bits": {NO_COL_FAMILY: 2}, + "rate_limiter_bytes_per_sec": {NO_COL_FAMILY: 128000000}, } self.db_options.update_options(update_dict) - db_path = '/dev/shm' + db_path = "/dev/shm" experiment_command = self.bench_runner._build_experiment_command( self.db_options, db_path ) opt_args_str = DBBenchRunner.get_opt_args_str( self.db_options.get_misc_options() ) - opt_args_str += ( - ' --options_file=' + - self.db_options.generate_options_config('12345') + opt_args_str += " --options_file=" + self.db_options.generate_options_config( + "12345" ) for arg in self.pos_args[2:]: - opt_args_str += (' --' + arg) + opt_args_str += " --" + arg expected_command = ( - self.pos_args[0] + ' --benchmarks=' + self.pos_args[1] + - ' --statistics --perf_level=3 --db=' + db_path + opt_args_str + self.pos_args[0] + + " --benchmarks=" + + self.pos_args[1] + + " --statistics --perf_level=3 --db=" + + db_path + + opt_args_str ) self.assertEqual(experiment_command, expected_command) @@ -104,44 +104,38 @@ class TestDBBenchRunner(unittest.TestCase): def setUp(self): # Note: the db_bench binary should be present in the rocksdb/ directory self.pos_args = [ - './../../db_bench', - 'overwrite', - 'use_existing_db=true', - 'duration=20' + "./../../db_bench", + "overwrite", + "use_existing_db=true", + "duration=20", ] self.bench_runner = DBBenchRunner(self.pos_args) this_path = os.path.abspath(os.path.dirname(__file__)) - options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') + options_path = os.path.join(this_path, "input_files/OPTIONS-000005") self.db_options = DatabaseOptions(options_path) def test_experiment_output(self): - update_dict = {'bloom_bits': {NO_COL_FAMILY: 2}} + update_dict = {"bloom_bits": {NO_COL_FAMILY: 2}} self.db_options.update_options(update_dict) - db_path = '/dev/shm' + db_path = "/dev/shm" data_sources, throughput = self.bench_runner.run_experiment( self.db_options, db_path ) self.assertEqual( - data_sources[DataSource.Type.DB_OPTIONS][0].type, - DataSource.Type.DB_OPTIONS - ) - self.assertEqual( - data_sources[DataSource.Type.LOG][0].type, - DataSource.Type.LOG + data_sources[DataSource.Type.DB_OPTIONS][0].type, DataSource.Type.DB_OPTIONS ) + self.assertEqual(data_sources[DataSource.Type.LOG][0].type, DataSource.Type.LOG) self.assertEqual(len(data_sources[DataSource.Type.TIME_SERIES]), 2) self.assertEqual( data_sources[DataSource.Type.TIME_SERIES][0].type, - DataSource.Type.TIME_SERIES + DataSource.Type.TIME_SERIES, ) self.assertEqual( data_sources[DataSource.Type.TIME_SERIES][1].type, - DataSource.Type.TIME_SERIES - ) - self.assertEqual( - data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0 + DataSource.Type.TIME_SERIES, ) + self.assertEqual(data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tools/advisor/test/test_db_log_parser.py b/tools/advisor/test/test_db_log_parser.py index b70430433..6862691c1 100644 --- a/tools/advisor/test/test_db_log_parser.py +++ b/tools/advisor/test/test_db_log_parser.py @@ -3,52 +3,49 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY -from advisor.rule_parser import Condition, LogCondition import os import unittest +from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY +from advisor.rule_parser import Condition, LogCondition + class TestLog(unittest.TestCase): def setUp(self): - self.column_families = ['default', 'col_fam_A'] + self.column_families = ["default", "col_fam_A"] def test_get_column_family(self): test_log = ( - "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + - "[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK" + "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + + "[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK" ) db_log = Log(test_log, self.column_families) - self.assertEqual('col_fam_A', db_log.get_column_family()) + self.assertEqual("col_fam_A", db_log.get_column_family()) test_log = ( - "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + - "[JOB 44] Level-0 flush table #84: 1890780 bytes OK" + "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + + "[JOB 44] Level-0 flush table #84: 1890780 bytes OK" ) db_log = Log(test_log, self.column_families) - db_log.append_message('[default] some remaining part of log') + db_log.append_message("[default] some remaining part of log") self.assertEqual(NO_COL_FAMILY, db_log.get_column_family()) def test_get_methods(self): hr_time = "2018/05/25-14:30:25.491635" context = "7f82ba72e700" message = ( - "[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " + - "#23: started" + "[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " + + "#23: started" ) test_log = hr_time + " " + context + " " + message db_log = Log(test_log, self.column_families) self.assertEqual(db_log.get_message(), message) remaining_message = "[col_fam_A] some more logs" db_log.append_message(remaining_message) - self.assertEqual( - db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635" - ) + self.assertEqual(db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635") self.assertEqual(db_log.get_context(), "7f82ba72e700") self.assertEqual(db_log.get_timestamp(), 1527258625) - self.assertEqual( - db_log.get_message(), str(message + '\n' + remaining_message) - ) + self.assertEqual(db_log.get_message(), str(message + "\n" + remaining_message)) def test_is_new_log(self): new_log = "2018/05/25-14:34:21.047233 context random new log" @@ -60,44 +57,40 @@ class TestLog(unittest.TestCase): class TestDatabaseLogs(unittest.TestCase): def test_check_and_trigger_conditions(self): this_path = os.path.abspath(os.path.dirname(__file__)) - logs_path_prefix = os.path.join(this_path, 'input_files/LOG-0') - column_families = ['default', 'col-fam-A', 'col-fam-B'] + logs_path_prefix = os.path.join(this_path, "input_files/LOG-0") + column_families = ["default", "col-fam-A", "col-fam-B"] db_logs = DatabaseLogs(logs_path_prefix, column_families) # matches, has 2 col_fams - condition1 = LogCondition.create(Condition('cond-A')) - condition1.set_parameter('regex', 'random log message') + condition1 = LogCondition.create(Condition("cond-A")) + condition1.set_parameter("regex", "random log message") # matches, multiple lines message - condition2 = LogCondition.create(Condition('cond-B')) - condition2.set_parameter('regex', 'continuing on next line') + condition2 = LogCondition.create(Condition("cond-B")) + condition2.set_parameter("regex", "continuing on next line") # does not match - condition3 = LogCondition.create(Condition('cond-C')) - condition3.set_parameter('regex', 'this should match no log') - db_logs.check_and_trigger_conditions( - [condition1, condition2, condition3] - ) + condition3 = LogCondition.create(Condition("cond-C")) + condition3.set_parameter("regex", "this should match no log") + db_logs.check_and_trigger_conditions([condition1, condition2, condition3]) cond1_trigger = condition1.get_trigger() self.assertEqual(2, len(cond1_trigger.keys())) - self.assertSetEqual( - {'col-fam-A', NO_COL_FAMILY}, set(cond1_trigger.keys()) - ) - self.assertEqual(2, len(cond1_trigger['col-fam-A'])) + self.assertSetEqual({"col-fam-A", NO_COL_FAMILY}, set(cond1_trigger.keys())) + self.assertEqual(2, len(cond1_trigger["col-fam-A"])) messages = [ "[db/db_impl.cc:563] [col-fam-A] random log message for testing", - "[db/db_impl.cc:653] [col-fam-A] another random log message" + "[db/db_impl.cc:653] [col-fam-A] another random log message", ] - self.assertIn(cond1_trigger['col-fam-A'][0].get_message(), messages) - self.assertIn(cond1_trigger['col-fam-A'][1].get_message(), messages) + self.assertIn(cond1_trigger["col-fam-A"][0].get_message(), messages) + self.assertIn(cond1_trigger["col-fam-A"][1].get_message(), messages) self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY])) self.assertEqual( cond1_trigger[NO_COL_FAMILY][0].get_message(), - "[db/db_impl.cc:331] [unknown] random log message no column family" + "[db/db_impl.cc:331] [unknown] random log message no column family", ) cond2_trigger = condition2.get_trigger() - self.assertEqual(['col-fam-B'], list(cond2_trigger.keys())) - self.assertEqual(1, len(cond2_trigger['col-fam-B'])) + self.assertEqual(["col-fam-B"], list(cond2_trigger.keys())) + self.assertEqual(1, len(cond2_trigger["col-fam-B"])) self.assertEqual( - cond2_trigger['col-fam-B'][0].get_message(), - "[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" + - "remaining part of the log" + cond2_trigger["col-fam-B"][0].get_message(), + "[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" + + "remaining part of the log", ) self.assertIsNone(condition3.get_trigger()) diff --git a/tools/advisor/test/test_db_options_parser.py b/tools/advisor/test/test_db_options_parser.py index d53a9bdb5..cdeebaefa 100644 --- a/tools/advisor/test/test_db_options_parser.py +++ b/tools/advisor/test/test_db_options_parser.py @@ -3,105 +3,107 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). +import os +import unittest + from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_options_parser import DatabaseOptions from advisor.rule_parser import Condition, OptionCondition -import os -import unittest class TestDatabaseOptions(unittest.TestCase): def setUp(self): self.this_path = os.path.abspath(os.path.dirname(__file__)) - self.og_options = os.path.join( - self.this_path, 'input_files/OPTIONS-000005' - ) - misc_options = [ - 'bloom_bits = 4', 'rate_limiter_bytes_per_sec = 1024000' - ] + self.og_options = os.path.join(self.this_path, "input_files/OPTIONS-000005") + misc_options = ["bloom_bits = 4", "rate_limiter_bytes_per_sec = 1024000"] # create the options object self.db_options = DatabaseOptions(self.og_options, misc_options) # perform clean-up before running tests self.generated_options = os.path.join( - self.this_path, '../temp/OPTIONS_testing.tmp' + self.this_path, "../temp/OPTIONS_testing.tmp" ) if os.path.isfile(self.generated_options): os.remove(self.generated_options) def test_get_options_diff(self): old_opt = { - 'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: '20'}, - 'CFOptions.write_buffer_size': { - 'default': '1024000', - 'col_fam_A': '128000', - 'col_fam_B': '128000000' + "DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: "20"}, + "CFOptions.write_buffer_size": { + "default": "1024000", + "col_fam_A": "128000", + "col_fam_B": "128000000", }, - 'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'}, - 'DBOptions.max_log_file_size': {NO_COL_FAMILY: '128000000'} + "DBOptions.use_fsync": {NO_COL_FAMILY: "true"}, + "DBOptions.max_log_file_size": {NO_COL_FAMILY: "128000000"}, } new_opt = { - 'bloom_bits': {NO_COL_FAMILY: '4'}, - 'CFOptions.write_buffer_size': { - 'default': '128000000', - 'col_fam_A': '128000', - 'col_fam_C': '128000000' + "bloom_bits": {NO_COL_FAMILY: "4"}, + "CFOptions.write_buffer_size": { + "default": "128000000", + "col_fam_A": "128000", + "col_fam_C": "128000000", }, - 'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'}, - 'DBOptions.max_log_file_size': {NO_COL_FAMILY: '0'} + "DBOptions.use_fsync": {NO_COL_FAMILY: "true"}, + "DBOptions.max_log_file_size": {NO_COL_FAMILY: "0"}, } diff = DatabaseOptions.get_options_diff(old_opt, new_opt) expected_diff = { - 'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: ('20', None)}, - 'bloom_bits': {NO_COL_FAMILY: (None, '4')}, - 'CFOptions.write_buffer_size': { - 'default': ('1024000', '128000000'), - 'col_fam_B': ('128000000', None), - 'col_fam_C': (None, '128000000') + "DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: ("20", None)}, + "bloom_bits": {NO_COL_FAMILY: (None, "4")}, + "CFOptions.write_buffer_size": { + "default": ("1024000", "128000000"), + "col_fam_B": ("128000000", None), + "col_fam_C": (None, "128000000"), }, - 'DBOptions.max_log_file_size': {NO_COL_FAMILY: ('128000000', '0')} + "DBOptions.max_log_file_size": {NO_COL_FAMILY: ("128000000", "0")}, } self.assertDictEqual(diff, expected_diff) def test_is_misc_option(self): - self.assertTrue(DatabaseOptions.is_misc_option('bloom_bits')) + self.assertTrue(DatabaseOptions.is_misc_option("bloom_bits")) self.assertFalse( - DatabaseOptions.is_misc_option('DBOptions.stats_dump_freq_sec') + DatabaseOptions.is_misc_option("DBOptions.stats_dump_freq_sec") ) def test_set_up(self): options = self.db_options.get_all_options() self.assertEqual(22, len(options.keys())) expected_misc_options = { - 'bloom_bits': '4', 'rate_limiter_bytes_per_sec': '1024000' + "bloom_bits": "4", + "rate_limiter_bytes_per_sec": "1024000", } - self.assertDictEqual( - expected_misc_options, self.db_options.get_misc_options() - ) + self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options()) self.assertListEqual( - ['default', 'col_fam_A'], self.db_options.get_column_families() + ["default", "col_fam_A"], self.db_options.get_column_families() ) def test_get_options(self): opt_to_get = [ - 'DBOptions.manual_wal_flush', 'DBOptions.db_write_buffer_size', - 'bloom_bits', 'CFOptions.compaction_filter_factory', - 'CFOptions.num_levels', 'rate_limiter_bytes_per_sec', - 'TableOptions.BlockBasedTable.block_align', 'random_option' + "DBOptions.manual_wal_flush", + "DBOptions.db_write_buffer_size", + "bloom_bits", + "CFOptions.compaction_filter_factory", + "CFOptions.num_levels", + "rate_limiter_bytes_per_sec", + "TableOptions.BlockBasedTable.block_align", + "random_option", ] options = self.db_options.get_options(opt_to_get) expected_options = { - 'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'}, - 'DBOptions.db_write_buffer_size': {NO_COL_FAMILY: '0'}, - 'bloom_bits': {NO_COL_FAMILY: '4'}, - 'CFOptions.compaction_filter_factory': { - 'default': 'nullptr', 'col_fam_A': 'nullptr' + "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"}, + "DBOptions.db_write_buffer_size": {NO_COL_FAMILY: "0"}, + "bloom_bits": {NO_COL_FAMILY: "4"}, + "CFOptions.compaction_filter_factory": { + "default": "nullptr", + "col_fam_A": "nullptr", + }, + "CFOptions.num_levels": {"default": "7", "col_fam_A": "5"}, + "rate_limiter_bytes_per_sec": {NO_COL_FAMILY: "1024000"}, + "TableOptions.BlockBasedTable.block_align": { + "default": "false", + "col_fam_A": "true", }, - 'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'}, - 'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: '1024000'}, - 'TableOptions.BlockBasedTable.block_align': { - 'default': 'false', 'col_fam_A': 'true' - } } self.assertDictEqual(expected_options, options) @@ -109,108 +111,104 @@ class TestDatabaseOptions(unittest.TestCase): # add new, update old, set old # before updating expected_old_opts = { - 'DBOptions.db_log_dir': {NO_COL_FAMILY: None}, - 'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'}, - 'bloom_bits': {NO_COL_FAMILY: '4'}, - 'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'}, - 'TableOptions.BlockBasedTable.block_restart_interval': { - 'col_fam_A': '16' - } + "DBOptions.db_log_dir": {NO_COL_FAMILY: None}, + "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"}, + "bloom_bits": {NO_COL_FAMILY: "4"}, + "CFOptions.num_levels": {"default": "7", "col_fam_A": "5"}, + "TableOptions.BlockBasedTable.block_restart_interval": {"col_fam_A": "16"}, } get_opts = list(expected_old_opts.keys()) options = self.db_options.get_options(get_opts) self.assertEqual(expected_old_opts, options) # after updating options update_opts = { - 'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'}, - 'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'true'}, - 'bloom_bits': {NO_COL_FAMILY: '2'}, - 'CFOptions.num_levels': {'col_fam_A': '7'}, - 'TableOptions.BlockBasedTable.block_restart_interval': { - 'default': '32' - }, - 'random_misc_option': {NO_COL_FAMILY: 'something'} + "DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"}, + "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "true"}, + "bloom_bits": {NO_COL_FAMILY: "2"}, + "CFOptions.num_levels": {"col_fam_A": "7"}, + "TableOptions.BlockBasedTable.block_restart_interval": {"default": "32"}, + "random_misc_option": {NO_COL_FAMILY: "something"}, } self.db_options.update_options(update_opts) - update_opts['CFOptions.num_levels']['default'] = '7' - update_opts['TableOptions.BlockBasedTable.block_restart_interval'] = { - 'default': '32', 'col_fam_A': '16' + update_opts["CFOptions.num_levels"]["default"] = "7" + update_opts["TableOptions.BlockBasedTable.block_restart_interval"] = { + "default": "32", + "col_fam_A": "16", } - get_opts.append('random_misc_option') + get_opts.append("random_misc_option") options = self.db_options.get_options(get_opts) self.assertDictEqual(update_opts, options) expected_misc_options = { - 'bloom_bits': '2', - 'rate_limiter_bytes_per_sec': '1024000', - 'random_misc_option': 'something' + "bloom_bits": "2", + "rate_limiter_bytes_per_sec": "1024000", + "random_misc_option": "something", } - self.assertDictEqual( - expected_misc_options, self.db_options.get_misc_options() - ) + self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options()) def test_generate_options_config(self): # make sure file does not exist from before self.assertFalse(os.path.isfile(self.generated_options)) - self.db_options.generate_options_config('testing') + self.db_options.generate_options_config("testing") self.assertTrue(os.path.isfile(self.generated_options)) def test_check_and_trigger_conditions(self): # options only from CFOptions # setup the OptionCondition objects to check and trigger update_dict = { - 'CFOptions.level0_file_num_compaction_trigger': {'col_fam_A': '4'}, - 'CFOptions.max_bytes_for_level_base': {'col_fam_A': '10'} + "CFOptions.level0_file_num_compaction_trigger": {"col_fam_A": "4"}, + "CFOptions.max_bytes_for_level_base": {"col_fam_A": "10"}, } self.db_options.update_options(update_dict) - cond1 = Condition('opt-cond-1') + cond1 = Condition("opt-cond-1") cond1 = OptionCondition.create(cond1) cond1.set_parameter( - 'options', [ - 'CFOptions.level0_file_num_compaction_trigger', - 'TableOptions.BlockBasedTable.block_restart_interval', - 'CFOptions.max_bytes_for_level_base' - ] + "options", + [ + "CFOptions.level0_file_num_compaction_trigger", + "TableOptions.BlockBasedTable.block_restart_interval", + "CFOptions.max_bytes_for_level_base", + ], ) cond1.set_parameter( - 'evaluate', - 'int(options[0])*int(options[1])-int(options[2])>=0' + "evaluate", "int(options[0])*int(options[1])-int(options[2])>=0" ) # only DBOptions - cond2 = Condition('opt-cond-2') + cond2 = Condition("opt-cond-2") cond2 = OptionCondition.create(cond2) cond2.set_parameter( - 'options', [ - 'DBOptions.db_write_buffer_size', - 'bloom_bits', - 'rate_limiter_bytes_per_sec' - ] + "options", + [ + "DBOptions.db_write_buffer_size", + "bloom_bits", + "rate_limiter_bytes_per_sec", + ], ) cond2.set_parameter( - 'evaluate', - '(int(options[2]) * int(options[1]) * int(options[0]))==0' + "evaluate", "(int(options[2]) * int(options[1]) * int(options[0]))==0" ) # mix of CFOptions and DBOptions - cond3 = Condition('opt-cond-3') + cond3 = Condition("opt-cond-3") cond3 = OptionCondition.create(cond3) cond3.set_parameter( - 'options', [ - 'DBOptions.db_write_buffer_size', # 0 - 'CFOptions.num_levels', # 5, 7 - 'bloom_bits' # 4 - ] + "options", + [ + "DBOptions.db_write_buffer_size", # 0 + "CFOptions.num_levels", # 5, 7 + "bloom_bits", # 4 + ], ) cond3.set_parameter( - 'evaluate', 'int(options[2])*int(options[0])+int(options[1])>6' + "evaluate", "int(options[2])*int(options[0])+int(options[1])>6" ) self.db_options.check_and_trigger_conditions([cond1, cond2, cond3]) - cond1_trigger = {'col_fam_A': ['4', '16', '10']} + cond1_trigger = {"col_fam_A": ["4", "16", "10"]} self.assertDictEqual(cond1_trigger, cond1.get_trigger()) - cond2_trigger = {NO_COL_FAMILY: ['0', '4', '1024000']} + cond2_trigger = {NO_COL_FAMILY: ["0", "4", "1024000"]} self.assertDictEqual(cond2_trigger, cond2.get_trigger()) - cond3_trigger = {'default': ['0', '7', '4']} + cond3_trigger = {"default": ["0", "7", "4"]} self.assertDictEqual(cond3_trigger, cond3.get_trigger()) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tools/advisor/test/test_db_stats_fetcher.py b/tools/advisor/test/test_db_stats_fetcher.py index afbbe8339..e2c29ab74 100644 --- a/tools/advisor/test/test_db_stats_fetcher.py +++ b/tools/advisor/test/test_db_stats_fetcher.py @@ -3,49 +3,46 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -from advisor.db_stats_fetcher import LogStatsParser, DatabasePerfContext -from advisor.db_timeseries_parser import NO_ENTITY -from advisor.rule_parser import Condition, TimeSeriesCondition import os import time import unittest from unittest.mock import MagicMock +from advisor.db_stats_fetcher import DatabasePerfContext, LogStatsParser +from advisor.db_timeseries_parser import NO_ENTITY +from advisor.rule_parser import Condition, TimeSeriesCondition + class TestLogStatsParser(unittest.TestCase): def setUp(self): this_path = os.path.abspath(os.path.dirname(__file__)) - stats_file = os.path.join( - this_path, 'input_files/log_stats_parser_keys_ts' - ) + stats_file = os.path.join(this_path, "input_files/log_stats_parser_keys_ts") # populate the keys_ts dictionary of LogStatsParser self.stats_dict = {NO_ENTITY: {}} - with open(stats_file, 'r') as fp: + with open(stats_file, "r") as fp: for line in fp: - stat_name = line.split(':')[0].strip() + stat_name = line.split(":")[0].strip() self.stats_dict[NO_ENTITY][stat_name] = {} - token_list = line.split(':')[1].strip().split(',') + token_list = line.split(":")[1].strip().split(",") for token in token_list: timestamp = int(token.split()[0]) value = float(token.split()[1]) self.stats_dict[NO_ENTITY][stat_name][timestamp] = value - self.log_stats_parser = LogStatsParser('dummy_log_file', 20) + self.log_stats_parser = LogStatsParser("dummy_log_file", 20) self.log_stats_parser.keys_ts = self.stats_dict def test_check_and_trigger_conditions_bursty(self): # mock fetch_timeseries() because 'keys_ts' has been pre-populated self.log_stats_parser.fetch_timeseries = MagicMock() # condition: bursty - cond1 = Condition('cond-1') + cond1 = Condition("cond-1") cond1 = TimeSeriesCondition.create(cond1) - cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') - cond1.set_parameter('behavior', 'bursty') - cond1.set_parameter('window_sec', 40) - cond1.set_parameter('rate_threshold', 0) + cond1.set_parameter("keys", "rocksdb.db.get.micros.p50") + cond1.set_parameter("behavior", "bursty") + cond1.set_parameter("window_sec", 40) + cond1.set_parameter("rate_threshold", 0) self.log_stats_parser.check_and_trigger_conditions([cond1]) - expected_cond_trigger = { - NO_ENTITY: {1530896440: 0.9767546362322214} - } + expected_cond_trigger = {NO_ENTITY: {1530896440: 0.9767546362322214}} self.assertDictEqual(expected_cond_trigger, cond1.get_trigger()) # ensure that fetch_timeseries() was called once self.log_stats_parser.fetch_timeseries.assert_called_once() @@ -54,23 +51,20 @@ class TestLogStatsParser(unittest.TestCase): # mock fetch_timeseries() because 'keys_ts' has been pre-populated self.log_stats_parser.fetch_timeseries = MagicMock() # condition: evaluate_expression - cond1 = Condition('cond-1') + cond1 = Condition("cond-1") cond1 = TimeSeriesCondition.create(cond1) - cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') - cond1.set_parameter('behavior', 'evaluate_expression') - keys = [ - 'rocksdb.manifest.file.sync.micros.p99', - 'rocksdb.db.get.micros.p50' - ] - cond1.set_parameter('keys', keys) - cond1.set_parameter('aggregation_op', 'latest') + cond1.set_parameter("keys", "rocksdb.db.get.micros.p50") + cond1.set_parameter("behavior", "evaluate_expression") + keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"] + cond1.set_parameter("keys", keys) + cond1.set_parameter("aggregation_op", "latest") # condition evaluates to FALSE - cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>200') + cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>200") self.log_stats_parser.check_and_trigger_conditions([cond1]) expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]} self.assertIsNone(cond1.get_trigger()) # condition evaluates to TRUE - cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)<200') + cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)<200") self.log_stats_parser.check_and_trigger_conditions([cond1]) expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]} self.assertDictEqual(expected_cond_trigger, cond1.get_trigger()) @@ -81,23 +75,22 @@ class TestLogStatsParser(unittest.TestCase): # mock fetch_timeseries() because 'keys_ts' has been pre-populated self.log_stats_parser.fetch_timeseries = MagicMock() # condition: evaluate_expression - cond1 = Condition('cond-1') + cond1 = Condition("cond-1") cond1 = TimeSeriesCondition.create(cond1) - cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') - cond1.set_parameter('behavior', 'evaluate_expression') - keys = [ - 'rocksdb.manifest.file.sync.micros.p99', - 'rocksdb.db.get.micros.p50' - ] - cond1.set_parameter('keys', keys) - cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>500') + cond1.set_parameter("keys", "rocksdb.db.get.micros.p50") + cond1.set_parameter("behavior", "evaluate_expression") + keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"] + cond1.set_parameter("keys", keys) + cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>500") self.log_stats_parser.check_and_trigger_conditions([cond1]) - expected_trigger = {NO_ENTITY: { - 1530896414: [9938.0, 16.31508], - 1530896440: [9938.0, 16.346602], - 1530896466: [9938.0, 16.284669], - 1530896492: [9938.0, 16.16005] - }} + expected_trigger = { + NO_ENTITY: { + 1530896414: [9938.0, 16.31508], + 1530896440: [9938.0, 16.346602], + 1530896466: [9938.0, 16.284669], + 1530896492: [9938.0, 16.16005], + } + } self.assertDictEqual(expected_trigger, cond1.get_trigger()) self.log_stats_parser.fetch_timeseries.assert_called_once() @@ -114,13 +107,15 @@ class TestDatabasePerfContext(unittest.TestCase): perf_ts[key] = {} start_val = perf_dict[key] for ix in range(5): - perf_ts[key][timestamp+(ix*10)] = start_val + (2 * ix * ix) + perf_ts[key][timestamp + (ix * 10)] = start_val + (2 * ix * ix) db_perf_context = DatabasePerfContext(perf_ts, 10, True) - timestamps = [timestamp+(ix*10) for ix in range(1, 5, 1)] + timestamps = [timestamp + (ix * 10) for ix in range(1, 5, 1)] values = [val for val in range(2, 15, 4)] inner_dict = {timestamps[ix]: values[ix] for ix in range(4)} - expected_keys_ts = {NO_ENTITY: { - 'user_key_comparison_count': inner_dict, - 'block_cache_hit_count': inner_dict - }} + expected_keys_ts = { + NO_ENTITY: { + "user_key_comparison_count": inner_dict, + "block_cache_hit_count": inner_dict, + } + } self.assertDictEqual(expected_keys_ts, db_perf_context.keys_ts) diff --git a/tools/advisor/test/test_rule_parser.py b/tools/advisor/test/test_rule_parser.py index 9f1d0bf5c..4ea4ca159 100644 --- a/tools/advisor/test/test_rule_parser.py +++ b/tools/advisor/test/test_rule_parser.py @@ -5,36 +5,32 @@ import os import unittest -from advisor.rule_parser import RulesSpec + from advisor.db_log_parser import DatabaseLogs, DataSource from advisor.db_options_parser import DatabaseOptions +from advisor.rule_parser import RulesSpec RuleToSuggestions = { - "stall-too-many-memtables": [ - 'inc-bg-flush', - 'inc-write-buffer' - ], + "stall-too-many-memtables": ["inc-bg-flush", "inc-write-buffer"], "stall-too-many-L0": [ - 'inc-max-subcompactions', - 'inc-max-bg-compactions', - 'inc-write-buffer-size', - 'dec-max-bytes-for-level-base', - 'inc-l0-slowdown-writes-trigger' + "inc-max-subcompactions", + "inc-max-bg-compactions", + "inc-write-buffer-size", + "dec-max-bytes-for-level-base", + "inc-l0-slowdown-writes-trigger", ], "stop-too-many-L0": [ - 'inc-max-bg-compactions', - 'inc-write-buffer-size', - 'inc-l0-stop-writes-trigger' + "inc-max-bg-compactions", + "inc-write-buffer-size", + "inc-l0-stop-writes-trigger", ], "stall-too-many-compaction-bytes": [ - 'inc-max-bg-compactions', - 'inc-write-buffer-size', - 'inc-hard-pending-compaction-bytes-limit', - 'inc-soft-pending-compaction-bytes-limit' + "inc-max-bg-compactions", + "inc-write-buffer-size", + "inc-hard-pending-compaction-bytes-limit", + "inc-soft-pending-compaction-bytes-limit", ], - "level0-level1-ratio": [ - 'l0-l1-ratio-health-check' - ] + "level0-level1-ratio": ["l0-l1-ratio-health-check"], } @@ -42,19 +38,19 @@ class TestAllRulesTriggered(unittest.TestCase): def setUp(self): # load the Rules this_path = os.path.abspath(os.path.dirname(__file__)) - ini_path = os.path.join(this_path, 'input_files/triggered_rules.ini') + ini_path = os.path.join(this_path, "input_files/triggered_rules.ini") self.db_rules = RulesSpec(ini_path) self.db_rules.load_rules_from_spec() self.db_rules.perform_section_checks() # load the data sources: LOG and OPTIONS - log_path = os.path.join(this_path, 'input_files/LOG-0') - options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') + log_path = os.path.join(this_path, "input_files/LOG-0") + options_path = os.path.join(this_path, "input_files/OPTIONS-000005") db_options_parser = DatabaseOptions(options_path) self.column_families = db_options_parser.get_column_families() db_logs_parser = DatabaseLogs(log_path, self.column_families) self.data_sources = { DataSource.Type.DB_OPTIONS: [db_options_parser], - DataSource.Type.LOG: [db_logs_parser] + DataSource.Type.LOG: [db_logs_parser], } def test_triggered_conditions(self): @@ -65,8 +61,7 @@ class TestAllRulesTriggered(unittest.TestCase): self.assertFalse(cond.is_triggered(), repr(cond)) for rule in rules_dict.values(): self.assertFalse( - rule.is_triggered(conditions_dict, self.column_families), - repr(rule) + rule.is_triggered(conditions_dict, self.column_families), repr(rule) ) # # Trigger the conditions as per the data sources. @@ -99,19 +94,19 @@ class TestConditionsConjunctions(unittest.TestCase): def setUp(self): # load the Rules this_path = os.path.abspath(os.path.dirname(__file__)) - ini_path = os.path.join(this_path, 'input_files/test_rules.ini') + ini_path = os.path.join(this_path, "input_files/test_rules.ini") self.db_rules = RulesSpec(ini_path) self.db_rules.load_rules_from_spec() self.db_rules.perform_section_checks() # load the data sources: LOG and OPTIONS - log_path = os.path.join(this_path, 'input_files/LOG-1') - options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') + log_path = os.path.join(this_path, "input_files/LOG-1") + options_path = os.path.join(this_path, "input_files/OPTIONS-000005") db_options_parser = DatabaseOptions(options_path) self.column_families = db_options_parser.get_column_families() db_logs_parser = DatabaseLogs(log_path, self.column_families) self.data_sources = { DataSource.Type.DB_OPTIONS: [db_options_parser], - DataSource.Type.LOG: [db_logs_parser] + DataSource.Type.LOG: [db_logs_parser], } def test_condition_conjunctions(self): @@ -122,46 +117,43 @@ class TestConditionsConjunctions(unittest.TestCase): self.assertFalse(cond.is_triggered(), repr(cond)) for rule in rules_dict.values(): self.assertFalse( - rule.is_triggered(conditions_dict, self.column_families), - repr(rule) + rule.is_triggered(conditions_dict, self.column_families), repr(rule) ) # Trigger the conditions as per the data sources. self.db_rules.trigger_conditions(self.data_sources) # Check for the conditions - conds_triggered = ['log-1-true', 'log-2-true', 'log-3-true'] - conds_not_triggered = ['log-4-false', 'options-1-false'] + conds_triggered = ["log-1-true", "log-2-true", "log-3-true"] + conds_not_triggered = ["log-4-false", "options-1-false"] for cond in conds_triggered: self.assertTrue(conditions_dict[cond].is_triggered(), repr(cond)) for cond in conds_not_triggered: self.assertFalse(conditions_dict[cond].is_triggered(), repr(cond)) # Check for the rules - rules_triggered = ['multiple-conds-true'] + rules_triggered = ["multiple-conds-true"] rules_not_triggered = [ - 'single-condition-false', - 'multiple-conds-one-false', - 'multiple-conds-all-false' + "single-condition-false", + "multiple-conds-one-false", + "multiple-conds-all-false", ] for rule_name in rules_triggered: rule = rules_dict[rule_name] self.assertTrue( - rule.is_triggered(conditions_dict, self.column_families), - repr(rule) + rule.is_triggered(conditions_dict, self.column_families), repr(rule) ) for rule_name in rules_not_triggered: rule = rules_dict[rule_name] self.assertFalse( - rule.is_triggered(conditions_dict, self.column_families), - repr(rule) + rule.is_triggered(conditions_dict, self.column_families), repr(rule) ) class TestSanityChecker(unittest.TestCase): def setUp(self): this_path = os.path.abspath(os.path.dirname(__file__)) - ini_path = os.path.join(this_path, 'input_files/rules_err1.ini') + ini_path = os.path.join(this_path, "input_files/rules_err1.ini") db_rules = RulesSpec(ini_path) db_rules.load_rules_from_spec() self.rules_dict = db_rules.get_rules_dict() @@ -169,39 +161,39 @@ class TestSanityChecker(unittest.TestCase): self.suggestions_dict = db_rules.get_suggestions_dict() def test_rule_missing_suggestions(self): - regex = '.*rule must have at least one suggestion.*' + regex = ".*rule must have at least one suggestion.*" with self.assertRaisesRegex(ValueError, regex): - self.rules_dict['missing-suggestions'].perform_checks() + self.rules_dict["missing-suggestions"].perform_checks() def test_rule_missing_conditions(self): - regex = '.*rule must have at least one condition.*' + regex = ".*rule must have at least one condition.*" with self.assertRaisesRegex(ValueError, regex): - self.rules_dict['missing-conditions'].perform_checks() + self.rules_dict["missing-conditions"].perform_checks() def test_condition_missing_regex(self): - regex = '.*provide regex for log condition.*' + regex = ".*provide regex for log condition.*" with self.assertRaisesRegex(ValueError, regex): - self.conditions_dict['missing-regex'].perform_checks() + self.conditions_dict["missing-regex"].perform_checks() def test_condition_missing_options(self): - regex = '.*options missing in condition.*' + regex = ".*options missing in condition.*" with self.assertRaisesRegex(ValueError, regex): - self.conditions_dict['missing-options'].perform_checks() + self.conditions_dict["missing-options"].perform_checks() def test_condition_missing_expression(self): - regex = '.*expression missing in condition.*' + regex = ".*expression missing in condition.*" with self.assertRaisesRegex(ValueError, regex): - self.conditions_dict['missing-expression'].perform_checks() + self.conditions_dict["missing-expression"].perform_checks() def test_suggestion_missing_option(self): - regex = '.*provide option or description.*' + regex = ".*provide option or description.*" with self.assertRaisesRegex(ValueError, regex): - self.suggestions_dict['missing-option'].perform_checks() + self.suggestions_dict["missing-option"].perform_checks() def test_suggestion_missing_description(self): - regex = '.*provide option or description.*' + regex = ".*provide option or description.*" with self.assertRaisesRegex(ValueError, regex): - self.suggestions_dict['missing-description'].perform_checks() + self.suggestions_dict["missing-description"].perform_checks() class TestParsingErrors(unittest.TestCase): @@ -209,26 +201,26 @@ class TestParsingErrors(unittest.TestCase): self.this_path = os.path.abspath(os.path.dirname(__file__)) def test_condition_missing_source(self): - ini_path = os.path.join(self.this_path, 'input_files/rules_err2.ini') + ini_path = os.path.join(self.this_path, "input_files/rules_err2.ini") db_rules = RulesSpec(ini_path) - regex = '.*provide source for condition.*' + regex = ".*provide source for condition.*" with self.assertRaisesRegex(NotImplementedError, regex): db_rules.load_rules_from_spec() def test_suggestion_missing_action(self): - ini_path = os.path.join(self.this_path, 'input_files/rules_err3.ini') + ini_path = os.path.join(self.this_path, "input_files/rules_err3.ini") db_rules = RulesSpec(ini_path) - regex = '.*provide action for option.*' + regex = ".*provide action for option.*" with self.assertRaisesRegex(ValueError, regex): db_rules.load_rules_from_spec() def test_section_no_name(self): - ini_path = os.path.join(self.this_path, 'input_files/rules_err4.ini') + ini_path = os.path.join(self.this_path, "input_files/rules_err4.ini") db_rules = RulesSpec(ini_path) - regex = 'Parsing error: needed section header:.*' + regex = "Parsing error: needed section header:.*" with self.assertRaisesRegex(ValueError, regex): db_rules.load_rules_from_spec() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tools/benchmark_ci.py b/tools/benchmark_ci.py index 0a82af55e..de9f69cf9 100755 --- a/tools/benchmark_ci.py +++ b/tools/benchmark_ci.py @@ -4,57 +4,59 @@ # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). -'''Run benchmark_compare.sh on the most recent build, for CI -''' +"""Run benchmark_compare.sh on the most recent build, for CI +""" import argparse import glob +import logging import os import re import shutil import subprocess import sys -import logging logging.basicConfig(level=logging.INFO) class Config: def __init__(self, args): - self.version_file = './include/rocksdb/version.h' + self.version_file = "./include/rocksdb/version.h" self.data_dir = os.path.expanduser(f"{args.db_dir}") self.results_dir = os.path.expanduser(f"{args.output_dir}") self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh" self.benchmark_cwd = f"{os.getcwd()}/tools" - benchmark_env_keys = ['LD_LIBRARY_PATH', - 'NUM_KEYS', - 'KEY_SIZE', - 'VALUE_SIZE', - 'CACHE_SIZE_MB', - 'DURATION_RW', - 'DURATION_RO', - 'MB_WRITE_PER_SEC', - 'NUM_THREADS', - 'COMPRESSION_TYPE', - 'MIN_LEVEL_TO_COMPRESS', - 'WRITE_BUFFER_SIZE_MB', - 'TARGET_FILE_SIZE_BASE_MB', - 'MAX_BYTES_FOR_LEVEL_BASE_MB', - 'MAX_BACKGROUND_JOBS', - 'CACHE_INDEX_AND_FILTER_BLOCKS', - 'USE_O_DIRECT', - 'STATS_INTERVAL_SECONDS', - 'SUBCOMPACTIONS', - 'COMPACTION_STYLE', - 'CI_TESTS_ONLY'] + benchmark_env_keys = [ + "LD_LIBRARY_PATH", + "NUM_KEYS", + "KEY_SIZE", + "VALUE_SIZE", + "CACHE_SIZE_MB", + "DURATION_RW", + "DURATION_RO", + "MB_WRITE_PER_SEC", + "NUM_THREADS", + "COMPRESSION_TYPE", + "MIN_LEVEL_TO_COMPRESS", + "WRITE_BUFFER_SIZE_MB", + "TARGET_FILE_SIZE_BASE_MB", + "MAX_BYTES_FOR_LEVEL_BASE_MB", + "MAX_BACKGROUND_JOBS", + "CACHE_INDEX_AND_FILTER_BLOCKS", + "USE_O_DIRECT", + "STATS_INTERVAL_SECONDS", + "SUBCOMPACTIONS", + "COMPACTION_STYLE", + "CI_TESTS_ONLY", + ] def read_version(config): - majorRegex = re.compile(r'#define ROCKSDB_MAJOR\s([0-9]+)') - minorRegex = re.compile(r'#define ROCKSDB_MINOR\s([0-9]+)') - patchRegex = re.compile(r'#define ROCKSDB_PATCH\s([0-9]+)') - with open(config.version_file, 'r') as reader: + majorRegex = re.compile(r"#define ROCKSDB_MAJOR\s([0-9]+)") + minorRegex = re.compile(r"#define ROCKSDB_MINOR\s([0-9]+)") + patchRegex = re.compile(r"#define ROCKSDB_PATCH\s([0-9]+)") + with open(config.version_file, "r") as reader: major = None minor = None patch = None @@ -77,8 +79,7 @@ def read_version(config): def prepare(version_str, config): - old_files = glob.glob(f"{config.results_dir}/{version_str}/**", - recursive=True) + old_files = glob.glob(f"{config.results_dir}/{version_str}/**", recursive=True) for f in old_files: if os.path.isfile(f): logging.debug(f"remove file {f}") @@ -96,8 +97,10 @@ def prepare(version_str, config): def results(version_str, config): # Copy the report TSV file back to the top level of results - shutil.copyfile(f"{config.results_dir}/{version_str}/report.tsv", - f"{config.results_dir}/report.tsv") + shutil.copyfile( + f"{config.results_dir}/{version_str}/report.tsv", + f"{config.results_dir}/report.tsv", + ) def cleanup(version_str, config): @@ -116,32 +119,41 @@ def get_benchmark_env(): def main(): - '''Tool for running benchmark_compare.sh on the most recent build, for CI + """Tool for running benchmark_compare.sh on the most recent build, for CI This tool will (1) Work out the current version of RocksDB (2) Run benchmark_compare with that version alone - ''' + """ parser = argparse.ArgumentParser( - description='benchmark_compare.sh Python wrapper for CI.') + description="benchmark_compare.sh Python wrapper for CI." + ) # --tsvfile is the name of the file to read results from # --esdocument is the ElasticSearch document to push these results into # - parser.add_argument('--db_dir', default='~/tmp/rocksdb-benchmark-datadir', - help='Database directory hierarchy to use') - parser.add_argument('--output_dir', default='~/tmp/benchmark-results', - help='Benchmark output goes here') - parser.add_argument('--num_keys', default='10000', - help='Number of database keys to use in benchmark test(s) (determines size of test job)') + parser.add_argument( + "--db_dir", + default="~/tmp/rocksdb-benchmark-datadir", + help="Database directory hierarchy to use", + ) + parser.add_argument( + "--output_dir", + default="~/tmp/benchmark-results", + help="Benchmark output goes here", + ) + parser.add_argument( + "--num_keys", + default="10000", + help="Number of database keys to use in benchmark test(s) (determines size of test job)", + ) args = parser.parse_args() config = Config(args) version = read_version(config) if version is None: - raise Exception( - f"Could not read RocksDB version from {config.version_file}") + raise Exception(f"Could not read RocksDB version from {config.version_file}") version_str = f"{version[0]}.{version[1]}.{version[2]}" logging.info(f"Run benchmark_ci with RocksDB version {version_str}") @@ -149,9 +161,13 @@ def main(): try: env = get_benchmark_env() - env.append(('NUM_KEYS', args.num_keys)) - cmd = [config.benchmark_script, - config.data_dir, config.results_dir, version_str] + env.append(("NUM_KEYS", args.num_keys)) + cmd = [ + config.benchmark_script, + config.data_dir, + config.results_dir, + version_str, + ] logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}") subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd) @@ -162,5 +178,5 @@ def main(): return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/tools/block_cache_analyzer/block_cache_pysim_test.py b/tools/block_cache_analyzer/block_cache_pysim_test.py index 4b2bdeba6..eed1b94af 100644 --- a/tools/block_cache_analyzer/block_cache_pysim_test.py +++ b/tools/block_cache_analyzer/block_cache_pysim_test.py @@ -8,9 +8,12 @@ import sys from block_cache_pysim import ( ARCCache, CacheEntry, + create_cache, GDSizeCache, HashTable, HyperbolicPolicy, + kMicrosInSecond, + kSampleSize, LFUPolicy, LinUCBCache, LRUCache, @@ -18,13 +21,10 @@ from block_cache_pysim import ( MRUPolicy, OPTCache, OPTCacheEntry, + run, ThompsonSamplingCache, TraceCache, TraceRecord, - create_cache, - kMicrosInSecond, - kSampleSize, - run, ) diff --git a/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py b/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py index aa6008ab9..37166bcb4 100644 --- a/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +++ b/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py @@ -13,6 +13,7 @@ import random import sys import matplotlib + matplotlib.use("Agg") import matplotlib.backends.backend_pdf import matplotlib.pyplot as plt diff --git a/tools/check_all_python.py b/tools/check_all_python.py index b4a41c10e..708339a67 100755 --- a/tools/check_all_python.py +++ b/tools/check_all_python.py @@ -15,8 +15,8 @@ for base in ["buckifier", "build_tools", "coverage", "tools"]: filenames += glob.glob(base + "/" + suff + ".py") for filename in filenames: - source = open(filename, 'r').read() + '\n' + source = open(filename, "r").read() + "\n" # Parses and syntax checks the file, throwing on error. (No pyc written.) - _ = compile(source, filename, 'exec') + _ = compile(source, filename, "exec") print("No syntax errors in {0} .py files".format(len(filenames))) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8d0d7882c..934a9d61e 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -2,14 +2,15 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import, division, print_function, unicode_literals +import argparse + import os -import sys -import time import random -import tempfile -import subprocess import shutil -import argparse +import subprocess +import sys +import tempfile +import time # params overwrite priority: # for default: @@ -37,8 +38,9 @@ default_params = { "batch_protection_bytes_per_key": lambda: random.choice([0, 8]), "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]), "block_size": 16384, - "bloom_bits": lambda: random.choice([random.randint(0,19), - random.lognormvariate(2.3, 1.3)]), + "bloom_bits": lambda: random.choice( + [random.randint(0, 19), random.lognormvariate(2.3, 1.3)] + ), "cache_index_and_filter_blocks": lambda: random.randint(0, 1), "cache_size": 8388608, "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]), @@ -47,12 +49,14 @@ default_params = { "charge_file_metadata": lambda: random.choice([0, 1]), "checkpoint_one_in": 1000000, "compression_type": lambda: random.choice( - ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]), - "bottommost_compression_type": lambda: - "disable" if random.randint(0, 1) == 0 else - random.choice( - ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]), - "checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]), + ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"] + ), + "bottommost_compression_type": lambda: "disable" + if random.randint(0, 1) == 0 + else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]), + "checksum_type": lambda: random.choice( + ["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"] + ), "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1), "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1), # Disabled compression_parallel_threads as the feature is not stable @@ -93,12 +97,12 @@ default_params = { # the random seed, so the same keys are chosen by every run for disallowing # overwrites. "nooverwritepercent": 1, - "open_files": lambda : random.choice([-1, -1, 100, 500000]), + "open_files": lambda: random.choice([-1, -1, 100, 500000]), "optimize_filters_for_memory": lambda: random.randint(0, 1), "partition_filters": lambda: random.randint(0, 1), "partition_pinning": lambda: random.randint(0, 3), "pause_background_one_in": 1000000, - "prefix_size" : lambda: random.choice([-1, 1, 5, 7, 8]), + "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]), "prefixpercent": 5, "progress_reports": 0, "readpercent": 45, @@ -117,7 +121,7 @@ default_params = { "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "mock_direct_io": False, "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]), - # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false. + # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false. "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), # 999 -> use Bloom API @@ -128,34 +132,36 @@ default_params = { "writepercent": 35, "format_version": lambda: random.choice([2, 3, 4, 5, 5]), "index_block_restart_interval": lambda: random.choice(range(1, 16)), - "use_multiget" : lambda: random.randint(0, 1), - "periodic_compaction_seconds" : - lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]), - "compaction_ttl" : lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]), + "use_multiget": lambda: random.randint(0, 1), + "periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]), + "compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]), # Test small max_manifest_file_size in a smaller chance, as most of the # time we wnat manifest history to be preserved to help debug - "max_manifest_file_size" : lambda : random.choice( - [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]), + "max_manifest_file_size": lambda: random.choice( + [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)] + ), # Sync mode might make test runs slower so running it in a smaller chance - "sync" : lambda : random.choice( - [1 if t == 0 else 0 for t in range(0, 20)]), + "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]), "bytes_per_sync": lambda: random.choice([0, 262144]), "wal_bytes_per_sync": lambda: random.choice([0, 524288]), # Disable compaction_readahead_size because the test is not passing. - #"compaction_readahead_size" : lambda : random.choice( + # "compaction_readahead_size" : lambda : random.choice( # [0, 0, 1024 * 1024]), - "db_write_buffer_size" : lambda: random.choice( - [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]), - "avoid_unnecessary_blocking_io" : random.randint(0, 1), - "write_dbid_to_manifest" : random.randint(0, 1), - "avoid_flush_during_recovery" : lambda: random.choice( - [1 if t == 0 else 0 for t in range(0, 8)]), - "max_write_batch_group_size_bytes" : lambda: random.choice( - [16, 64, 1024 * 1024, 16 * 1024 * 1024]), - "level_compaction_dynamic_level_bytes" : True, + "db_write_buffer_size": lambda: random.choice( + [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024] + ), + "avoid_unnecessary_blocking_io": random.randint(0, 1), + "write_dbid_to_manifest": random.randint(0, 1), + "avoid_flush_during_recovery": lambda: random.choice( + [1 if t == 0 else 0 for t in range(0, 8)] + ), + "max_write_batch_group_size_bytes": lambda: random.choice( + [16, 64, 1024 * 1024, 16 * 1024 * 1024] + ), + "level_compaction_dynamic_level_bytes": True, "verify_checksum_one_in": 1000000, "verify_db_one_in": 100000, - "continuous_verification_interval" : 0, + "continuous_verification_interval": 0, "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 32, 1000]), @@ -166,10 +172,11 @@ default_params = { "get_property_one_in": 1000000, "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), "max_write_buffer_size_to_maintain": lambda: random.choice( - [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]), + [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024] + ), "user_timestamp_size": 0, - "secondary_cache_fault_one_in" : lambda: random.choice([0, 0, 32]), - "prepopulate_block_cache" : lambda: random.choice([0, 1]), + "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]), + "prepopulate_block_cache": lambda: random.choice([0, 1]), "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]), "memtable_whole_key_filtering": lambda: random.randint(0, 1), "detect_filter_construct_corruption": lambda: random.choice([0, 1]), @@ -177,9 +184,13 @@ default_params = { "async_io": lambda: random.choice([0, 1]), "wal_compression": lambda: random.choice(["none", "zstd"]), "verify_sst_unique_id_in_manifest": 1, # always do unique_id verification - "secondary_cache_uri": lambda: random.choice( - ["", "compressed_secondary_cache://capacity=8388608", - "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true"]), + "secondary_cache_uri": lambda: random.choice( + [ + "", + "compressed_secondary_cache://capacity=8388608", + "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true", + ] + ), "allow_data_in_errors": True, "readahead_size": lambda: random.choice([0, 16384, 524288]), "initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]), @@ -187,11 +198,12 @@ default_params = { "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]), } -_TEST_DIR_ENV_VAR = 'TEST_TMPDIR' -_DEBUG_LEVEL_ENV_VAR = 'DEBUG_LEVEL' +_TEST_DIR_ENV_VAR = "TEST_TMPDIR" +_DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL" stress_cmd = "./db_stress" + def is_release_mode(): return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0" @@ -207,7 +219,10 @@ def get_dbname(test_name): os.mkdir(dbname) return dbname + expected_values_dir = None + + def setup_expected_values_dir(): global expected_values_dir if expected_values_dir is not None: @@ -215,8 +230,7 @@ def setup_expected_values_dir(): expected_dir_prefix = "rocksdb_crashtest_expected_" test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) if test_tmpdir is None or test_tmpdir == "": - expected_values_dir = tempfile.mkdtemp( - prefix=expected_dir_prefix) + expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix) else: # if tmpdir is specified, store the expected_values_dir under that dir expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected" @@ -225,7 +239,10 @@ def setup_expected_values_dir(): os.mkdir(expected_values_dir) return expected_values_dir + multiops_txn_key_spaces_file = None + + def setup_multiops_txn_key_spaces_file(): global multiops_txn_key_spaces_file if multiops_txn_key_spaces_file is not None: @@ -233,13 +250,15 @@ def setup_multiops_txn_key_spaces_file(): key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces" test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) if test_tmpdir is None or test_tmpdir == "": - multiops_txn_key_spaces_file = tempfile.mkstemp( - prefix=key_spaces_file_prefix)[1] + multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[ + 1 + ] else: if not os.path.exists(test_tmpdir): os.mkdir(test_tmpdir) multiops_txn_key_spaces_file = tempfile.mkstemp( - prefix=key_spaces_file_prefix, dir=test_tmpdir)[1] + prefix=key_spaces_file_prefix, dir=test_tmpdir + )[1] return multiops_txn_key_spaces_file @@ -291,7 +310,7 @@ simple_default_params = { "write_buffer_size": 32 * 1024 * 1024, "level_compaction_dynamic_level_bytes": False, "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), - "verify_iterator_with_expected_state_one_in": 5 # this locks a range of keys + "verify_iterator_with_expected_state_one_in": 5, # this locks a range of keys } blackbox_simple_default_params = { @@ -317,7 +336,7 @@ cf_consistency_params = { } txn_params = { - "use_txn" : 1, + "use_txn": 1, # Avoid lambda to set it once for the entire test "txn_write_policy": random.randint(0, 2), "unordered_write": random.randint(0, 1), @@ -347,10 +366,14 @@ blob_params = { "blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]), "blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]), "enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3), - "blob_garbage_collection_age_cutoff": lambda: random.choice([0.0, 0.25, 0.5, 0.75, 1.0]), + "blob_garbage_collection_age_cutoff": lambda: random.choice( + [0.0, 0.25, 0.5, 0.75, 1.0] + ), "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]), "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]), - "blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]), + "blob_file_starting_level": lambda: random.choice( + [0] * 4 + [1] * 3 + [2] * 2 + [3] + ), "use_blob_cache": lambda: random.randint(0, 1), "use_shared_block_and_blob_cache": lambda: random.randint(0, 1), "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]), @@ -418,7 +441,7 @@ multiops_txn_default_params = { # compactions. "flush_one_in": 1000, "key_spaces_path": setup_multiops_txn_key_spaces_file(), - "rollback_one_in": 4, + "rollback_one_in": 4, # Re-enable once we have a compaction for MultiOpsTxnStressTest "enable_compaction_filter": 0, "create_timestamped_snapshot_one_in": 50, @@ -446,11 +469,11 @@ multiops_wp_txn_params = { "create_timestamped_snapshot_one_in": 0, } + def finalize_and_sanitize(src_params): - dest_params = dict([(k, v() if callable(v) else v) - for (k, v) in src_params.items()]) + dest_params = {k : v() if callable(v) else v for (k, v) in src_params.items()} if is_release_mode(): - dest_params['read_fault_one_in'] = 0 + dest_params["read_fault_one_in"] = 0 if dest_params.get("compression_max_dict_bytes") == 0: dest_params["compression_zstd_max_train_bytes"] = 0 dest_params["compression_max_dict_buffer_bytes"] = 0 @@ -466,13 +489,15 @@ def finalize_and_sanitize(src_params): # used by `IngestExternalFile()`, causing it to fail with mmap # reads. Remove this once it is fixed. dest_params["ingest_external_file_one_in"] = 0 - if (dest_params["use_direct_io_for_flush_and_compaction"] == 1 - or dest_params["use_direct_reads"] == 1) and \ - not is_direct_io_supported(dest_params["db"]): + if ( + dest_params["use_direct_io_for_flush_and_compaction"] == 1 + or dest_params["use_direct_reads"] == 1 + ) and not is_direct_io_supported(dest_params["db"]): if is_release_mode(): - print("{} does not support direct IO. Disabling use_direct_reads and " - "use_direct_io_for_flush_and_compaction.\n".format( - dest_params["db"])) + print( + "{} does not support direct IO. Disabling use_direct_reads and " + "use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"]) + ) dest_params["use_direct_reads"] = 0 dest_params["use_direct_io_for_flush_and_compaction"] = 0 else: @@ -480,18 +505,22 @@ def finalize_and_sanitize(src_params): # Multi-key operations are not currently compatible with transactions or # timestamp. - if (dest_params.get("test_batches_snapshots") == 1 or - dest_params.get("use_txn") == 1 or - dest_params.get("user_timestamp_size") > 0): + if ( + dest_params.get("test_batches_snapshots") == 1 + or dest_params.get("use_txn") == 1 + or dest_params.get("user_timestamp_size") > 0 + ): dest_params["delpercent"] += dest_params["delrangepercent"] dest_params["delrangepercent"] = 0 dest_params["ingest_external_file_one_in"] = 0 # Correctness testing with unsync data loss is not currently compatible # with transactions - if (dest_params.get("use_txn") == 1): + if dest_params.get("use_txn") == 1: dest_params["sync_fault_injection"] = 0 - if (dest_params.get("disable_wal") == 1 or - dest_params.get("sync_fault_injection") == 1): + if ( + dest_params.get("disable_wal") == 1 + or dest_params.get("sync_fault_injection") == 1 + ): # File ingestion does not guarantee prefix-recoverability when unsynced # data can be lost. Ingesting a file syncs data immediately that is # newer than unsynced memtable data that can be lost on restart. @@ -544,8 +573,10 @@ def finalize_and_sanitize(src_params): dest_params["readpercent"] += dest_params.get("prefixpercent", 20) dest_params["prefixpercent"] = 0 dest_params["test_batches_snapshots"] = 0 - if (dest_params.get("prefix_size") == -1 and - dest_params.get("memtable_whole_key_filtering") == 0): + if ( + dest_params.get("prefix_size") == -1 + and dest_params.get("memtable_whole_key_filtering") == 0 + ): dest_params["memtable_prefix_bloom_size_ratio"] = 0 if dest_params.get("two_write_queues") == 1: dest_params["enable_pipelined_write"] = 0 @@ -566,19 +597,20 @@ def finalize_and_sanitize(src_params): return dest_params + def gen_cmd_params(args): params = {} params.update(default_params) - if args.test_type == 'blackbox': + if args.test_type == "blackbox": params.update(blackbox_default_params) - if args.test_type == 'whitebox': + if args.test_type == "whitebox": params.update(whitebox_default_params) if args.simple: params.update(simple_default_params) - if args.test_type == 'blackbox': + if args.test_type == "blackbox": params.update(blackbox_simple_default_params) - if args.test_type == 'whitebox': + if args.test_type == "whitebox": params.update(whitebox_simple_default_params) if args.cf_consistency: params.update(cf_consistency_params) @@ -590,9 +622,9 @@ def gen_cmd_params(args): params.update(ts_params) if args.test_multiops_txn: params.update(multiops_txn_default_params) - if args.write_policy == 'write_committed': + if args.write_policy == "write_committed": params.update(multiops_wc_txn_params) - elif args.write_policy == 'write_prepared': + elif args.write_policy == "write_prepared": params.update(multiops_wp_txn_params) if args.test_tiered_storage: params.update(tiered_params) @@ -600,9 +632,12 @@ def gen_cmd_params(args): # Best-effort recovery, user defined timestamp, tiered storage are currently # incompatible with BlobDB. Test BE recovery if specified on the command # line; otherwise, apply BlobDB related overrides with a 10% chance. - if (not args.test_best_efforts_recovery and - not args.enable_ts and not args.test_tiered_storage and - random.choice([0] * 9 + [1]) == 1): + if ( + not args.test_best_efforts_recovery + and not args.enable_ts + and not args.test_tiered_storage + and random.choice([0] * 9 + [1]) == 1 + ): params.update(blob_params) for k, v in vars(args).items(): @@ -613,68 +648,87 @@ def gen_cmd_params(args): def gen_cmd(params, unknown_params): finalzied_params = finalize_and_sanitize(params) - cmd = [stress_cmd] + [ - '--{0}={1}'.format(k, v) - for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)] - if k not in set(['test_type', 'simple', 'duration', 'interval', - 'random_kill_odd', 'cf_consistency', 'txn', - 'test_best_efforts_recovery', 'enable_ts', - 'test_multiops_txn', 'write_policy', 'stress_cmd', - 'test_tiered_storage']) - and v is not None] + unknown_params + cmd = ( + [stress_cmd] + + [ + "--{0}={1}".format(k, v) + for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)] + if k + not in { + "test_type", + "simple", + "duration", + "interval", + "random_kill_odd", + "cf_consistency", + "txn", + "test_best_efforts_recovery", + "enable_ts", + "test_multiops_txn", + "write_policy", + "stress_cmd", + "test_tiered_storage", + } + and v is not None + ] + + unknown_params + ) return cmd def execute_cmd(cmd, timeout): - child = subprocess.Popen(cmd, stderr=subprocess.PIPE, - stdout=subprocess.PIPE) - print("Running db_stress with pid=%d: %s\n\n" - % (child.pid, ' '.join(cmd))) + child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd))) try: outs, errs = child.communicate(timeout=timeout) hit_timeout = False - print("WARNING: db_stress ended before kill: exitcode=%d\n" - % child.returncode) + print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode) except subprocess.TimeoutExpired: hit_timeout = True child.kill() print("KILLED %d\n" % child.pid) outs, errs = child.communicate() - return hit_timeout, child.returncode, outs.decode('utf-8'), errs.decode('utf-8') + return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8") # This script runs and kills db_stress multiple times. It checks consistency # in case of unsafe crashes in RocksDB. def blackbox_crash_main(args, unknown_args): cmd_params = gen_cmd_params(args) - dbname = get_dbname('blackbox') - exit_time = time.time() + cmd_params['duration'] - - print("Running blackbox-crash-test with \n" - + "interval_between_crash=" + str(cmd_params['interval']) + "\n" - + "total-duration=" + str(cmd_params['duration']) + "\n") + dbname = get_dbname("blackbox") + exit_time = time.time() + cmd_params["duration"] + + print( + "Running blackbox-crash-test with \n" + + "interval_between_crash=" + + str(cmd_params["interval"]) + + "\n" + + "total-duration=" + + str(cmd_params["duration"]) + + "\n" + ) while time.time() < exit_time: - cmd = gen_cmd(dict( - list(cmd_params.items()) - + list({'db': dbname}.items())), unknown_args) + cmd = gen_cmd( + dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args + ) - hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params['interval']) + hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"]) if not hit_timeout: - print('Exit Before Killing') - print('stdout:') + print("Exit Before Killing") + print("stdout:") print(outs) - print('stderr:') + print("stderr:") print(errs) sys.exit(2) - for line in errs.split('\n'): - if line != '' and not line.startswith('WARNING'): - print('stderr has error message:') - print('***' + line + '***') + for line in errs.split("\n"): + if line != "" and not line.startswith("WARNING"): + print("stderr has error message:") + print("***" + line + "***") time.sleep(1) # time to stabilize before the next run @@ -688,89 +742,109 @@ def blackbox_crash_main(args, unknown_args): # kill_random_test that causes rocksdb to crash at various points in code. def whitebox_crash_main(args, unknown_args): cmd_params = gen_cmd_params(args) - dbname = get_dbname('whitebox') + dbname = get_dbname("whitebox") cur_time = time.time() - exit_time = cur_time + cmd_params['duration'] - half_time = cur_time + cmd_params['duration'] // 2 + exit_time = cur_time + cmd_params["duration"] + half_time = cur_time + cmd_params["duration"] // 2 - print("Running whitebox-crash-test with \n" - + "total-duration=" + str(cmd_params['duration']) + "\n") + print( + "Running whitebox-crash-test with \n" + + "total-duration=" + + str(cmd_params["duration"]) + + "\n" + ) total_check_mode = 4 check_mode = 0 - kill_random_test = cmd_params['random_kill_odd'] + kill_random_test = cmd_params["random_kill_odd"] kill_mode = 0 while time.time() < exit_time: if check_mode == 0: additional_opts = { # use large ops per thread since we will kill it anyway - "ops_per_thread": 100 * cmd_params['ops_per_thread'], + "ops_per_thread": 100 + * cmd_params["ops_per_thread"], } # run with kill_random_test, with three modes. # Mode 0 covers all kill points. Mode 1 covers less kill points but # increases change of triggering them. Mode 2 covers even less # frequent kill points and further increases triggering change. if kill_mode == 0: - additional_opts.update({ - "kill_random_test": kill_random_test, - }) + additional_opts.update( + { + "kill_random_test": kill_random_test, + } + ) elif kill_mode == 1: - if cmd_params.get('disable_wal', 0) == 1: + if cmd_params.get("disable_wal", 0) == 1: my_kill_odd = kill_random_test // 50 + 1 else: my_kill_odd = kill_random_test // 10 + 1 - additional_opts.update({ - "kill_random_test": my_kill_odd, - "kill_exclude_prefixes": "WritableFileWriter::Append," - + "WritableFileWriter::WriteBuffered", - }) + additional_opts.update( + { + "kill_random_test": my_kill_odd, + "kill_exclude_prefixes": "WritableFileWriter::Append," + + "WritableFileWriter::WriteBuffered", + } + ) elif kill_mode == 2: # TODO: May need to adjust random odds if kill_random_test # is too small. - additional_opts.update({ - "kill_random_test": (kill_random_test // 5000 + 1), - "kill_exclude_prefixes": "WritableFileWriter::Append," - "WritableFileWriter::WriteBuffered," - "PosixMmapFile::Allocate,WritableFileWriter::Flush", - }) + additional_opts.update( + { + "kill_random_test": (kill_random_test // 5000 + 1), + "kill_exclude_prefixes": "WritableFileWriter::Append," + "WritableFileWriter::WriteBuffered," + "PosixMmapFile::Allocate,WritableFileWriter::Flush", + } + ) # Run kill mode 0, 1 and 2 by turn. kill_mode = (kill_mode + 1) % 3 elif check_mode == 1: # normal run with universal compaction mode additional_opts = { "kill_random_test": None, - "ops_per_thread": cmd_params['ops_per_thread'], + "ops_per_thread": cmd_params["ops_per_thread"], "compaction_style": 1, } # Single level universal has a lot of special logic. Ensure we cover # it sometimes. if random.randint(0, 1) == 1: - additional_opts.update({ - "num_levels": 1, - }) + additional_opts.update( + { + "num_levels": 1, + } + ) elif check_mode == 2: # normal run with FIFO compaction mode # ops_per_thread is divided by 5 because FIFO compaction # style is quite a bit slower on reads with lot of files additional_opts = { "kill_random_test": None, - "ops_per_thread": cmd_params['ops_per_thread'] // 5, + "ops_per_thread": cmd_params["ops_per_thread"] // 5, "compaction_style": 2, } else: # normal run additional_opts = { "kill_random_test": None, - "ops_per_thread": cmd_params['ops_per_thread'], + "ops_per_thread": cmd_params["ops_per_thread"], } - cmd = gen_cmd(dict(list(cmd_params.items()) - + list(additional_opts.items()) - + list({'db': dbname}.items())), unknown_args) + cmd = gen_cmd( + dict( + list(cmd_params.items()) + + list(additional_opts.items()) + + list({"db": dbname}.items()) + ), + unknown_args, + ) - print("Running:" + ' '.join(cmd) + "\n") # noqa: E999 T25377293 Grandfathered in + print( + "Running:" + " ".join(cmd) + "\n" + ) # noqa: E999 T25377293 Grandfathered in # If the running time is 15 minutes over the run time, explicit kill and # exit even if white box kill didn't hit. This is to guarantee run time @@ -779,9 +853,11 @@ def whitebox_crash_main(args, unknown_args): # TODO detect a hanging condition. The job might run too long as RocksDB # hits a hanging bug. hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd( - cmd, exit_time - time.time() + 900) - msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format( - check_mode, additional_opts['kill_random_test'], retncode)) + cmd, exit_time - time.time() + 900 + ) + msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format( + check_mode, additional_opts["kill_random_test"], retncode + ) print(msg) print(stdoutdata) @@ -792,10 +868,10 @@ def whitebox_crash_main(args, unknown_args): break expected = False - if additional_opts['kill_random_test'] is None and (retncode == 0): + if additional_opts["kill_random_test"] is None and (retncode == 0): # we expect zero retncode if no kill option expected = True - elif additional_opts['kill_random_test'] is not None and retncode <= 0: + elif additional_opts["kill_random_test"] is not None and retncode <= 0: # When kill option is given, the test MIGHT kill itself. # If it does, negative retncode is expected. Otherwise 0. expected = True @@ -805,15 +881,13 @@ def whitebox_crash_main(args, unknown_args): sys.exit(1) stderrdata = stderrdata.lower() - errorcount = (stderrdata.count('error') - - stderrdata.count('got errors 0 times')) - print("#times error occurred in output is " + str(errorcount) + - "\n") + errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") + print("#times error occurred in output is " + str(errorcount) + "\n") - if (errorcount > 0): + if errorcount > 0: print("TEST FAILED. Output has 'error'!!!\n") sys.exit(2) - if (stderrdata.find('fail') >= 0): + if stderrdata.find("fail") >= 0: print("TEST FAILED. Output has 'fail'!!!\n") sys.exit(2) @@ -824,7 +898,7 @@ def whitebox_crash_main(args, unknown_args): # success shutil.rmtree(dbname, True) os.mkdir(dbname) - cmd_params.pop('expected_values_dir', None) + cmd_params.pop("expected_values_dir", None) check_mode = (check_mode + 1) % total_check_mode time.sleep(1) # time to stabilize after a kill @@ -833,34 +907,38 @@ def whitebox_crash_main(args, unknown_args): def main(): global stress_cmd - parser = argparse.ArgumentParser(description="This script runs and kills \ - db_stress multiple times") + parser = argparse.ArgumentParser( + description="This script runs and kills \ + db_stress multiple times" + ) parser.add_argument("test_type", choices=["blackbox", "whitebox"]) parser.add_argument("--simple", action="store_true") - parser.add_argument("--cf_consistency", action='store_true') - parser.add_argument("--txn", action='store_true') - parser.add_argument("--test_best_efforts_recovery", action='store_true') - parser.add_argument("--enable_ts", action='store_true') - parser.add_argument("--test_multiops_txn", action='store_true') + parser.add_argument("--cf_consistency", action="store_true") + parser.add_argument("--txn", action="store_true") + parser.add_argument("--test_best_efforts_recovery", action="store_true") + parser.add_argument("--enable_ts", action="store_true") + parser.add_argument("--test_multiops_txn", action="store_true") parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"]) parser.add_argument("--stress_cmd") - parser.add_argument("--test_tiered_storage", action='store_true') - - all_params = dict(list(default_params.items()) - + list(blackbox_default_params.items()) - + list(whitebox_default_params.items()) - + list(simple_default_params.items()) - + list(blackbox_simple_default_params.items()) - + list(whitebox_simple_default_params.items()) - + list(blob_params.items()) - + list(ts_params.items()) - + list(multiops_txn_default_params.items()) - + list(multiops_wc_txn_params.items()) - + list(multiops_wp_txn_params.items()) - + list(best_efforts_recovery_params.items()) - + list(cf_consistency_params.items()) - + list(tiered_params.items()) - + list(txn_params.items())) + parser.add_argument("--test_tiered_storage", action="store_true") + + all_params = dict( + list(default_params.items()) + + list(blackbox_default_params.items()) + + list(whitebox_default_params.items()) + + list(simple_default_params.items()) + + list(blackbox_simple_default_params.items()) + + list(whitebox_simple_default_params.items()) + + list(blob_params.items()) + + list(ts_params.items()) + + list(multiops_txn_default_params.items()) + + list(multiops_wc_txn_params.items()) + + list(multiops_wp_txn_params.items()) + + list(best_efforts_recovery_params.items()) + + list(cf_consistency_params.items()) + + list(tiered_params.items()) + + list(txn_params.items()) + ) for k, v in all_params.items(): parser.add_argument("--" + k, type=type(v() if callable(v) else v)) @@ -869,15 +947,17 @@ def main(): test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) if test_tmpdir is not None and not os.path.isdir(test_tmpdir): - print('%s env var is set to a non-existent directory: %s' % - (_TEST_DIR_ENV_VAR, test_tmpdir)) + print( + "%s env var is set to a non-existent directory: %s" + % (_TEST_DIR_ENV_VAR, test_tmpdir) + ) sys.exit(1) if args.stress_cmd: stress_cmd = args.stress_cmd - if args.test_type == 'blackbox': + if args.test_type == "blackbox": blackbox_crash_main(args, unknown_args) - if args.test_type == 'whitebox': + if args.test_type == "whitebox": whitebox_crash_main(args, unknown_args) # Only delete the `expected_values_dir` if test passes if expected_values_dir is not None: @@ -886,5 +966,5 @@ def main(): os.remove(multiops_txn_key_spaces_file) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tools/ldb_test.py b/tools/ldb_test.py index c69c5ca73..e243d69c0 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -2,65 +2,72 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import, division, print_function, unicode_literals -import os import glob + +import os import os.path +import re import shutil import subprocess +import tempfile import time import unittest -import tempfile -import re + def my_check_output(*popenargs, **kwargs): """ If we had python 2.7, we should simply use subprocess.check_output. This is a stop-gap solution for python 2.6 """ - if 'stdout' in kwargs: - raise ValueError('stdout argument not allowed, it will be overridden.') - process = subprocess.Popen(stderr=subprocess.PIPE, stdout=subprocess.PIPE, - *popenargs, **kwargs) + if "stdout" in kwargs: + raise ValueError("stdout argument not allowed, it will be overridden.") + process = subprocess.Popen( + stderr=subprocess.PIPE, stdout=subprocess.PIPE, *popenargs, **kwargs + ) output, unused_err = process.communicate() retcode = process.poll() if retcode: cmd = kwargs.get("args") if cmd is None: cmd = popenargs[0] - raise Exception("Exit code is not 0. It is %d. Command: %s" % - (retcode, cmd)) - return output.decode('utf-8') + raise Exception("Exit code is not 0. It is %d. Command: %s" % (retcode, cmd)) + return output.decode("utf-8") + def run_err_null(cmd): return os.system(cmd + " 2>/dev/null ") + class LDBTestCase(unittest.TestCase): def setUp(self): - self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_") + self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_") self.DB_NAME = "testdb" def tearDown(self): - assert(self.TMP_DIR.strip() != "/" - and self.TMP_DIR.strip() != "/tmp" - and self.TMP_DIR.strip() != "/tmp/") #Just some paranoia + assert ( + self.TMP_DIR.strip() != "/" + and self.TMP_DIR.strip() != "/tmp" + and self.TMP_DIR.strip() != "/tmp/" + ) # Just some paranoia shutil.rmtree(self.TMP_DIR) def dbParam(self, dbName): return "--db=%s" % os.path.join(self.TMP_DIR, dbName) - def assertRunOKFull(self, params, expectedOutput, unexpected=False, - isPattern=False): + def assertRunOKFull( + self, params, expectedOutput, unexpected=False, isPattern=False + ): """ All command-line params must be specified. Allows full flexibility in testing; for example: missing db param. """ - output = my_check_output("./ldb %s |grep -v \"Created bg thread\"" % - params, shell=True) + output = my_check_output( + './ldb %s |grep -v "Created bg thread"' % params, shell=True + ) if not unexpected: if isPattern: - self.assertNotEqual(expectedOutput.search(output.strip()), - None) + self.assertNotEqual(expectedOutput.search(output.strip()), None) else: self.assertEqual(output.strip(), expectedOutput.strip()) else: @@ -76,20 +83,25 @@ class LDBTestCase(unittest.TestCase): """ try: - my_check_output("./ldb %s >/dev/null 2>&1 |grep -v \"Created bg \ - thread\"" % params, shell=True) + my_check_output( + './ldb %s >/dev/null 2>&1 |grep -v "Created bg \ + thread"' + % params, + shell=True, + ) except Exception: return self.fail( - "Exception should have been raised for command with params: %s" % - params) + "Exception should have been raised for command with params: %s" % params + ) def assertRunOK(self, params, expectedOutput, unexpected=False): """ Uses the default test db. """ - self.assertRunOKFull("%s %s" % (self.dbParam(self.DB_NAME), params), - expectedOutput, unexpected) + self.assertRunOKFull( + "%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected + ) def assertRunFAIL(self, params): """ @@ -118,16 +130,17 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("scan --to=x2", "x1 : y1") self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1") - self.assertRunOK("scan --from=x1 --to=z --max_keys=2", - "x1 : y1\nx2 : y2") + self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 : y1\nx2 : y2") - self.assertRunOK("scan --from=x1 --to=z --max_keys=3", - "x1 : y1\nx2 : y2\nx3 : y3") - self.assertRunOK("scan --from=x1 --to=z --max_keys=4", - "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK( + "scan --from=x1 --to=z --max_keys=3", "x1 : y1\nx2 : y2\nx3 : y3" + ) + self.assertRunOK( + "scan --from=x1 --to=z --max_keys=4", "x1 : y1\nx2 : y2\nx3 : y3" + ) self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1") self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3") - self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL + self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") @@ -148,18 +161,18 @@ class LDBTestCase(unittest.TestCase): return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params)) def writeExternSst(self, params, inputDumpFile, outputSst): - return 0 == run_err_null("cat %s | ./ldb write_extern_sst %s %s" - % (inputDumpFile, outputSst, params)) + return 0 == run_err_null( + "cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params) + ) def ingestExternSst(self, params, inputSst): - return 0 == run_err_null("./ldb ingest_extern_sst %s %s" - % (inputSst, params)) + return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params)) def testStringBatchPut(self): print("Running testStringBatchPut...") self.assertRunOK("batchput x1 y1 --create_if_missing", "OK") self.assertRunOK("scan", "x1 : y1") - self.assertRunOK("batchput x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") + self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") self.assertRunFAIL("batchput") self.assertRunFAIL("batchput k1") @@ -171,7 +184,9 @@ class LDBTestCase(unittest.TestCase): dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") self.assertRunOK("scan", "x1 : y1") - self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") + self.assertRunOK( + 'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK" + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") blob_files = self.getBlobFiles(dbPath) @@ -195,13 +210,18 @@ class LDBTestCase(unittest.TestCase): print("Running testBlobStartingLevel...") dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) - self.assertRunOK("put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", "OK") + self.assertRunOK( + "put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", + "OK", + ) self.assertRunOK("get x1", "y1") blob_files = self.getBlobFiles(dbPath) self.assertTrue(len(blob_files) == 0) - self.assertRunOK("put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK") + self.assertRunOK( + "put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK" + ) self.assertRunOK("get x1", "y1") self.assertRunOK("get x2", "y2") self.assertRunFAIL("get x3") @@ -213,19 +233,37 @@ class LDBTestCase(unittest.TestCase): print("Running testCountDelimDump...") self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") - self.assertRunOK("dump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") - self.assertRunOK("dump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") + self.assertRunOK( + "dump --count_delim", + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK( + 'dump --count_delim="."', + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") - self.assertRunOK("dump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8") + self.assertRunOK( + 'dump --count_delim=","', + "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8", + ) def testCountDelimIDump(self): print("Running testCountDelimIDump...") self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") - self.assertRunOK("idump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") - self.assertRunOK("idump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") + self.assertRunOK( + "idump --count_delim", + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) + self.assertRunOK( + 'idump --count_delim="."', + "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8", + ) self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") - self.assertRunOK("idump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8") + self.assertRunOK( + 'idump --count_delim=","', + "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8", + ) def testInvalidCmdLines(self): print("Running testInvalidCmdLines...") @@ -253,12 +291,13 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("get --key_hex 0x6132", "b2") self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232") self.assertRunOK("get --value_hex a2", "0x6232") - self.assertRunOK("scan --key_hex --value_hex", - "0x6131 : 0x6231\n0x6132 : 0x6232") - self.assertRunOK("scan --hex --from=0x6131 --to=0x6133", - "0x6131 : 0x6231\n0x6132 : 0x6232") - self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", - "0x6131 : 0x6231") + self.assertRunOK( + "scan --key_hex --value_hex", "0x6131 : 0x6231\n0x6132 : 0x6232" + ) + self.assertRunOK( + "scan --hex --from=0x6131 --to=0x6133", "0x6131 : 0x6231\n0x6132 : 0x6232" + ) + self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 : 0x6231") self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2") self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232") self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK") @@ -272,8 +311,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK") self.assertRunOK("scan --hex", "0x6131 : 0x6231", True) self.assertRunOK("dump --ttl ", "a1 ==> b1", True) - self.assertRunOK("dump --hex --ttl ", - "0x6131 ==> 0x6231\nKeys in range: 1") + self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1") self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231") self.assertRunOK("get --value_hex a1", "0x6231", True) self.assertRunOK("get --ttl a1", "b1") @@ -295,8 +333,7 @@ class LDBTestCase(unittest.TestCase): def testDumpLoad(self): print("Running testDumpLoad...") - self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", - "OK") + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) @@ -304,98 +341,125 @@ class LDBTestCase(unittest.TestCase): dumpFilePath = os.path.join(self.TMP_DIR, "dump1") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1") self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) # Dump and load in hex dumpFilePath = os.path.join(self.TMP_DIR, "dump2") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2") self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertTrue( + self.loadDb( + "--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) # Dump only a portion of the key range dumpFilePath = os.path.join(self.TMP_DIR, "dump3") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3") - self.assertTrue(self.dumpDb( - "--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertTrue( + self.dumpDb("--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath) + ) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2") # Dump upto max_keys rows dumpFilePath = os.path.join(self.TMP_DIR, "dump4") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4") - self.assertTrue(self.dumpDb( - "--db=%s --max_keys=3" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3") + self.assertTrue(self.dumpDb("--db=%s --max_keys=3" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3") # Load into an existing db, create_if_missing is not specified self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) # Dump and load with WAL disabled dumpFilePath = os.path.join(self.TMP_DIR, "dump5") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5") self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --disable_wal --create_if_missing" % loadedDbPath, - dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertTrue( + self.loadDb( + "--db=%s --disable_wal --create_if_missing" % loadedDbPath, dumpFilePath + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) # Dump and load with lots of extra params specified - extraParams = " ".join(["--bloom_bits=14", "--block_size=1024", - "--auto_compaction=true", - "--write_buffer_size=4194304", - "--file_size=2097152"]) + extraParams = " ".join( + [ + "--bloom_bits=14", + "--block_size=1024", + "--auto_compaction=true", + "--write_buffer_size=4194304", + "--file_size=2097152", + ] + ) dumpFilePath = os.path.join(self.TMP_DIR, "dump6") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6") - self.assertTrue(self.dumpDb( - "--db=%s %s" % (origDbPath, extraParams), dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), - dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertTrue( + self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath) + ) + self.assertTrue( + self.loadDb( + "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), + dumpFilePath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) # Dump with count_only dumpFilePath = os.path.join(self.TMP_DIR, "dump7") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7") - self.assertTrue(self.dumpDb( - "--db=%s --count_only" % origDbPath, dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) + self.assertTrue(self.dumpDb("--db=%s --count_only" % origDbPath, dumpFilePath)) + self.assertTrue( + self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) + ) # DB should have atleast one value for scan to work self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK") self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1") # Dump command fails because of typo in params dumpFilePath = os.path.join(self.TMP_DIR, "dump8") - self.assertFalse(self.dumpDb( - "--db=%s --create_if_missing" % origDbPath, dumpFilePath)) + self.assertFalse( + self.dumpDb("--db=%s --create_if_missing" % origDbPath, dumpFilePath) + ) # Dump and load with BlobDB enabled - blobParams = " ".join(["--enable_blob_files", "--min_blob_size=1", - "--blob_file_size=2097152"]) + blobParams = " ".join( + ["--enable_blob_files", "--min_blob_size=1", "--blob_file_size=2097152"] + ) dumpFilePath = os.path.join(self.TMP_DIR, "dump9") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9") - self.assertTrue(self.dumpDb( - "--db=%s" % (origDbPath), dumpFilePath)) - self.assertTrue(self.loadDb( - "--db=%s %s --create_if_missing --disable_wal" % (loadedDbPath, blobParams), - dumpFilePath)) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertTrue(self.dumpDb("--db=%s" % (origDbPath), dumpFilePath)) + self.assertTrue( + self.loadDb( + "--db=%s %s --create_if_missing --disable_wal" + % (loadedDbPath, blobParams), + dumpFilePath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + ) blob_files = self.getBlobFiles(loadedDbPath) self.assertTrue(len(blob_files) >= 1) @@ -404,12 +468,14 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("put a val --create_if_missing", "OK") self.assertRunOK("put b val", "OK") self.assertRunOK( - "idump", "'a' seq:1, type:1 => val\n" - "'b' seq:2, type:1 => val\nInternal keys in range: 2") + "idump", + "'a' seq:1, type:1 => val\n" + "'b' seq:2, type:1 => val\nInternal keys in range: 2", + ) self.assertRunOK( - "idump --input_key_hex --from=%s --to=%s" % (hex(ord('a')), - hex(ord('b'))), - "'a' seq:1, type:1 => val\nInternal keys in range: 1") + "idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))), + "'a' seq:1, type:1 => val\nInternal keys in range: 1", + ) def testIDumpDecodeBlobIndex(self): print("Running testIDumpDecodeBlobIndex...") @@ -420,45 +486,55 @@ class LDBTestCase(unittest.TestCase): regex = ".*\[blob ref\].*" expected_pattern = re.compile(regex) cmd = "idump %s --decode_blob_index" - self.assertRunOKFull((cmd) - % (self.dbParam(self.DB_NAME)), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd) % (self.dbParam(self.DB_NAME)), + expected_pattern, + unexpected=False, + isPattern=True, + ) def testMiscAdminTask(self): print("Running testMiscAdminTask...") # These tests need to be improved; for example with asserts about # whether compaction or level reduction actually took place. - self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", - "OK") + self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) - self.assertTrue(0 == run_err_null( - "./ldb compact --db=%s" % origDbPath)) + self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath)) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") - self.assertTrue(0 == run_err_null( - "./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)) + self.assertTrue( + 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath) + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") - self.assertTrue(0 == run_err_null( - "./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)) + self.assertTrue( + 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath) + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") - self.assertTrue(0 == run_err_null( - "./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)) + self.assertTrue( + 0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath) + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") - self.assertTrue(0 == run_err_null( - "./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" - % origDbPath)) + self.assertTrue( + 0 + == run_err_null( + "./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath + ) + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") - #TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. - self.assertTrue(0 == run_err_null( - "./ldb dump_wal --db=%s --walfile=%s --header" % ( - origDbPath, os.path.join(origDbPath, "LOG")))) + # TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. + self.assertTrue( + 0 + == run_err_null( + "./ldb dump_wal --db=%s --walfile=%s --header" + % (origDbPath, os.path.join(origDbPath, "LOG")) + ) + ) self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") def testCheckConsistency(self): @@ -470,8 +546,9 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("get x1", "y1") self.assertRunOK("checkconsistency", "OK") - sstFilePath = my_check_output("ls %s" % os.path.join(dbPath, "*.sst"), - shell=True) + sstFilePath = my_check_output( + "ls %s" % os.path.join(dbPath, "*.sst"), shell=True + ) # Modify the file my_check_output("echo 'evil' > %s" % sstFilePath, shell=True) @@ -482,8 +559,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunFAIL("checkconsistency") def dumpLiveFiles(self, params, dumpFile): - return 0 == run_err_null("./ldb dump_live_files %s > %s" % ( - params, dumpFile)) + return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile)) def testDumpLiveFiles(self): print("Running testDumpLiveFiles...") @@ -506,7 +582,12 @@ class LDBTestCase(unittest.TestCase): dbPath += "/" # Call the dump_live_files function with the edited dbPath name. - self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, dumpFilePath)) + self.assertTrue( + self.dumpLiveFiles( + "--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, + dumpFilePath, + ) + ) # Investigate the output with open(dumpFilePath, "r") as tmp: @@ -517,14 +598,14 @@ class LDBTestCase(unittest.TestCase): self.assertTrue(len(sstFileList) >= 1) for sstFilename in sstFileList: filenumber = re.findall(r"\d+.sst", sstFilename)[0] - self.assertEqual(sstFilename, dbPath+filenumber) + self.assertEqual(sstFilename, dbPath + filenumber) # Check that all the Blob filenames have a correct full path (no multiple '/'). blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data) self.assertTrue(len(blobFileList) >= 1) for blobFilename in blobFileList: filenumber = re.findall(r"\d+.blob", blobFilename)[0] - self.assertEqual(blobFilename, dbPath+filenumber) + self.assertEqual(blobFilename, dbPath + filenumber) # Check that all the manifest filenames # have a correct full path (no multiple '/'). @@ -532,15 +613,16 @@ class LDBTestCase(unittest.TestCase): self.assertTrue(len(manifestFileList) >= 1) for manifestFilename in manifestFileList: filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] - self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber) + self.assertEqual(manifestFilename, dbPath + "MANIFEST-" + filenumber) # Check that the blob file index is decoded. decodedBlobIndex = re.findall(r"\[blob ref\]", data) self.assertTrue(len(decodedBlobIndex) >= 1) def listLiveFilesMetadata(self, params, dumpFile): - return 0 == run_err_null("./ldb list_live_files_metadata %s > %s" % ( - params, dumpFile)) + return 0 == run_err_null( + "./ldb list_live_files_metadata %s > %s" % (params, dumpFile) + ) def testListLiveFilesMetadata(self): print("Running testListLiveFilesMetadata...") @@ -554,23 +636,27 @@ class LDBTestCase(unittest.TestCase): dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1") self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1)) dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2") - self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath2)) + self.assertTrue( + self.listLiveFilesMetadata( + "--sort_by_filename --db=%s" % dbPath, dumpFilePath2 + ) + ) # Collect SST filename and level from dump_live_files with open(dumpFilePath1, "r") as tmp: data = tmp.read() - filename1 = re.findall(r".*\d+\.sst",data)[0] - level1 = re.findall(r"level:\d+",data)[0].split(':')[1] + filename1 = re.findall(r".*\d+\.sst", data)[0] + level1 = re.findall(r"level:\d+", data)[0].split(":")[1] # Collect SST filename and level from list_live_files_metadata with open(dumpFilePath2, "r") as tmp: data = tmp.read() - filename2 = re.findall(r".*\d+\.sst",data)[0] - level2 = re.findall(r"level \d+",data)[0].split(' ')[1] + filename2 = re.findall(r".*\d+\.sst", data)[0] + level2 = re.findall(r"level \d+", data)[0].split(" ")[1] # Assert equality between filenames and levels. - self.assertEqual(filename1,filename2) - self.assertEqual(level1,level2) + self.assertEqual(filename1, filename2) + self.assertEqual(level1, level2) # Create multiple column families and compare the output # of list_live_files_metadata with dump_live_files once again. @@ -586,7 +672,11 @@ class LDBTestCase(unittest.TestCase): dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3") self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3)) dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4") - self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath4)) + self.assertTrue( + self.listLiveFilesMetadata( + "--sort_by_filename --db=%s" % dbPath, dumpFilePath4 + ) + ) # dump_live_files: # parse the output and create a map: @@ -601,7 +691,7 @@ class LDBTestCase(unittest.TestCase): # re.findall should not reorder the data. # Therefore namesAndLevels[i] matches the data from cfs[i]. for count, nameAndLevel in enumerate(namesAndLevels): - sstFilename = re.findall(r"\d+.sst",nameAndLevel)[0] + sstFilename = re.findall(r"\d+.sst", nameAndLevel)[0] sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0] cf = cfs[count] referenceMap[sstFilename] = [sstLevel, cf] @@ -618,13 +708,13 @@ class LDBTestCase(unittest.TestCase): sstLines = re.findall(r".*\d+.sst.*", data) for line in sstLines: sstFilename = re.findall(r"\d+.sst", line)[0] - sstLevel = re.findall(r"(?<=level )\d+",line)[0] - cf = re.findall(r"(?<=column family \')\w+(?=\')",line)[0] + sstLevel = re.findall(r"(?<=level )\d+", line)[0] + cf = re.findall(r"(?<=column family \')\w+(?=\')", line)[0] testMap[sstFilename] = [sstLevel, cf] # Compare the map obtained from dump_live_files and the map # obtained from list_live_files_metadata. Everything should match. - self.assertEqual(referenceMap,testMap) + self.assertEqual(referenceMap, testMap) def getManifests(self, directory): return glob.glob(directory + "/MANIFEST-*") @@ -657,25 +747,30 @@ class LDBTestCase(unittest.TestCase): manifest_files = self.getManifests(dbPath) self.assertTrue(len(manifest_files) == 1) # Test with the default manifest file in dbPath. - self.assertRunOKFull(cmd % dbPath, expected_pattern, - unexpected=False, isPattern=True) + self.assertRunOKFull( + cmd % dbPath, expected_pattern, unexpected=False, isPattern=True + ) self.copyManifests(manifest_files[0], manifest_files[0] + "1") manifest_files = self.getManifests(dbPath) self.assertTrue(len(manifest_files) == 2) # Test with multiple manifest files in dbPath. self.assertRunFAILFull(cmd % dbPath) # Running it with the copy we just created should pass. - self.assertRunOKFull((cmd + " --path=%s") - % (dbPath, manifest_files[1]), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd + " --path=%s") % (dbPath, manifest_files[1]), + expected_pattern, + unexpected=False, + isPattern=True, + ) # Make sure that using the dump with --path will result in identical # output as just using manifest_dump. cmd = "dump --path=%s" - self.assertRunOKFull((cmd) - % (manifest_files[1]), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd) % (manifest_files[1]), + expected_pattern, + unexpected=False, + isPattern=True, + ) # Check if null characters doesn't infer with output format. self.assertRunOK("put a1 b1", "OK") @@ -696,11 +791,14 @@ class LDBTestCase(unittest.TestCase): # Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\' # (we cannot use null character in the subprocess input either, # so we have to use '.{2}') - cmd_verbose = "manifest_dump --verbose --db=%s | grep -aq $'\'r.{2}O\'' && echo 'matched' || echo 'not matched'" %dbPath - - self.assertRunOKFull(cmd_verbose , expected_verbose_output, - unexpected=False, isPattern=True) + cmd_verbose = ( + "manifest_dump --verbose --db=%s | grep -aq $''r.{2}O'' && echo 'matched' || echo 'not matched'" + % dbPath + ) + self.assertRunOKFull( + cmd_verbose, expected_verbose_output, unexpected=False, isPattern=True + ) def testGetProperty(self): print("Running testGetProperty...") @@ -709,16 +807,15 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("put 2 2", "OK") # A "string" property cmd = "--db=%s get_property rocksdb.estimate-num-keys" - self.assertRunOKFull(cmd % dbPath, - "rocksdb.estimate-num-keys: 2") + self.assertRunOKFull(cmd % dbPath, "rocksdb.estimate-num-keys: 2") # A "map" property # FIXME: why doesn't this pick up two entries? cmd = "--db=%s get_property rocksdb.aggregated-table-properties" part = "rocksdb.aggregated-table-properties.num_entries: " expected_pattern = re.compile(part) - self.assertRunOKFull(cmd % dbPath, - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + cmd % dbPath, expected_pattern, unexpected=False, isPattern=True + ) # An invalid property cmd = "--db=%s get_property rocksdb.this-property-does-not-exist" self.assertRunFAILFull(cmd % dbPath) @@ -738,27 +835,27 @@ class LDBTestCase(unittest.TestCase): sst_files = self.getSSTFiles(dbPath) self.assertTrue(len(sst_files) >= 1) cmd = "dump --path=%s --decode_blob_index" - self.assertRunOKFull((cmd) - % (sst_files[0]), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd) % (sst_files[0]), expected_pattern, unexpected=False, isPattern=True + ) def testBlobDump(self): print("Running testBlobDump") dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") - self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") + self.assertRunOK( + 'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK" + ) # Pattern to expect from blob file dump. - regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" + regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa expected_pattern = re.compile(regex) blob_files = self.getBlobFiles(dbPath) self.assertTrue(len(blob_files) >= 1) cmd = "dump --path=%s --dump_uncompressed_blobs" - self.assertRunOKFull((cmd) - % (blob_files[0]), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd) % (blob_files[0]), expected_pattern, unexpected=False, isPattern=True + ) def testWALDump(self): print("Running testWALDump...") @@ -775,15 +872,14 @@ class LDBTestCase(unittest.TestCase): wal_files = self.getWALFiles(dbPath) self.assertTrue(len(wal_files) >= 1) cmd = "dump --path=%s" - self.assertRunOKFull((cmd) - % (wal_files[0]), - expected_pattern, unexpected=False, - isPattern=True) + self.assertRunOKFull( + (cmd) % (wal_files[0]), expected_pattern, unexpected=False, isPattern=True + ) def testListColumnFamilies(self): print("Running testListColumnFamilies...") self.assertRunOK("put x1 y1 --create_if_missing", "OK") - cmd = "list_column_families | grep -v \"Column families\"" + cmd = 'list_column_families | grep -v "Column families"' # Test on valid dbPath. self.assertRunOK(cmd, "{default}") # Test on empty path. @@ -791,34 +887,28 @@ class LDBTestCase(unittest.TestCase): def testColumnFamilies(self): print("Running testColumnFamilies...") - dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) # noqa: F841 T25377293 Grandfathered in + _ = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertRunOK("put cf1_1 1 --create_if_missing", "OK") self.assertRunOK("put cf1_2 2 --create_if_missing", "OK") self.assertRunOK("put cf1_3 3 --try_load_options", "OK") # Given non-default column family to single CF DB. self.assertRunFAIL("get cf1_1 --column_family=two") self.assertRunOK("create_column_family two", "OK") - self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", - "OK") - self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", - "OK") + self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", "OK") + self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", "OK") self.assertRunOK("delete cf1_2", "OK") self.assertRunOK("create_column_family three", "OK") self.assertRunOK("delete cf2_2 --column_family=two", "OK") - self.assertRunOK( - "put cf3_1 3 --create_if_missing --column_family=three", - "OK") + self.assertRunOK("put cf3_1 3 --create_if_missing --column_family=three", "OK") self.assertRunOK("get cf1_1 --column_family=default", "1") - self.assertRunOK("dump --column_family=two", - "cf2_1 ==> 1\nKeys in range: 1") - self.assertRunOK("dump --column_family=two --try_load_options", - "cf2_1 ==> 1\nKeys in range: 1") - self.assertRunOK("dump", - "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2") - self.assertRunOK("get cf2_1 --column_family=two", - "1") - self.assertRunOK("get cf3_1 --column_family=three", - "3") + self.assertRunOK("dump --column_family=two", "cf2_1 ==> 1\nKeys in range: 1") + self.assertRunOK( + "dump --column_family=two --try_load_options", + "cf2_1 ==> 1\nKeys in range: 1", + ) + self.assertRunOK("dump", "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2") + self.assertRunOK("get cf2_1 --column_family=two", "1") + self.assertRunOK("get cf3_1 --column_family=three", "3") self.assertRunOK("drop_column_family three", "OK") # non-existing column family. self.assertRunFAIL("get cf3_1 --column_family=four") @@ -830,32 +920,36 @@ class LDBTestCase(unittest.TestCase): # Dump, load, write external sst and ingest it in another db dbPath = os.path.join(self.TMP_DIR, "db1") self.assertRunOK( - "batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" - % dbPath, - "OK") - self.assertRunOK("scan --db=%s" % dbPath, - "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + "batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath, + "OK", + ) + self.assertRunOK("scan --db=%s" % dbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") dumpFilePath = os.path.join(self.TMP_DIR, "dump1") - with open(dumpFilePath, 'w') as f: + with open(dumpFilePath, "w") as f: f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40") externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst") - self.assertTrue(self.writeExternSst("--create_if_missing --db=%s" - % dbPath, - dumpFilePath, - externSstPath)) + self.assertTrue( + self.writeExternSst( + "--create_if_missing --db=%s" % dbPath, dumpFilePath, externSstPath + ) + ) # cannot ingest if allow_global_seqno is false self.assertFalse( self.ingestExternSst( - "--create_if_missing --allow_global_seqno=false --db=%s" - % dbPath, - externSstPath)) + "--create_if_missing --allow_global_seqno=false --db=%s" % dbPath, + externSstPath, + ) + ) self.assertTrue( self.ingestExternSst( - "--create_if_missing --allow_global_seqno --db=%s" - % dbPath, - externSstPath)) - self.assertRunOKFull("scan --db=%s" % dbPath, - "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40") + "--create_if_missing --allow_global_seqno --db=%s" % dbPath, + externSstPath, + ) + ) + self.assertRunOKFull( + "scan --db=%s" % dbPath, "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40" + ) + if __name__ == "__main__": unittest.main() diff --git a/tools/write_stress_runner.py b/tools/write_stress_runner.py index 962515dfb..f39f79cd4 100644 --- a/tools/write_stress_runner.py +++ b/tools/write_stress_runner.py @@ -2,11 +2,12 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import, division, print_function, unicode_literals -import subprocess import argparse import random -import time + +import subprocess import sys +import time def generate_runtimes(total_runtime): @@ -24,31 +25,33 @@ def generate_runtimes(total_runtime): def main(args): runtimes = generate_runtimes(int(args.runtime_sec)) - print("Going to execute write stress for " + str(runtimes)) # noqa: E999 T25377293 Grandfathered in + print( + "Going to execute write stress for " + str(runtimes) + ) # noqa: E999 T25377293 Grandfathered in first_time = True for runtime in runtimes: kill = random.choice([False, True]) - cmd = './write_stress --runtime_sec=' + \ - ("-1" if kill else str(runtime)) + cmd = "./write_stress --runtime_sec=" + ("-1" if kill else str(runtime)) if len(args.db) > 0: - cmd = cmd + ' --db=' + args.db + cmd = cmd + " --db=" + args.db if first_time: first_time = False else: # use current db - cmd = cmd + ' --destroy_db=false' + cmd = cmd + " --destroy_db=false" if random.choice([False, True]): - cmd = cmd + ' --delete_obsolete_files_with_fullscan=true' + cmd = cmd + " --delete_obsolete_files_with_fullscan=true" if random.choice([False, True]): - cmd = cmd + ' --low_open_files_mode=true' + cmd = cmd + " --low_open_files_mode=true" - print("Running write_stress for %d seconds (%s): %s" % - (runtime, ("kill-mode" if kill else "clean-shutdown-mode"), - cmd)) + print( + "Running write_stress for %d seconds (%s): %s" + % (runtime, ("kill-mode" if kill else "clean-shutdown-mode"), cmd) + ) child = subprocess.Popen([cmd], shell=True) killtime = time.time() + runtime @@ -58,19 +61,23 @@ def main(args): if child.returncode == 0: break else: - print("ERROR: write_stress died with exitcode=%d\n" - % child.returncode) + print( + "ERROR: write_stress died with exitcode=%d\n" % child.returncode + ) sys.exit(1) if kill: child.kill() # breathe time.sleep(3) -if __name__ == '__main__': + +if __name__ == "__main__": random.seed(time.time()) - parser = argparse.ArgumentParser(description="This script runs and kills \ - write_stress multiple times") - parser.add_argument("--runtime_sec", default='1000') - parser.add_argument("--db", default='') + parser = argparse.ArgumentParser( + description="This script runs and kills \ + write_stress multiple times" + ) + parser.add_argument("--runtime_sec", default="1000") + parser.add_argument("--db", default="") args = parser.parse_args() main(args)