Enable BLACK for internal_repo_rocksdb (#10710)

Summary:
Enable BLACK for internal_repo_rocksdb.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10710

Reviewed By: riversand963, zsol

Differential Revision: D39666245

Pulled By: gitbw95

fbshipit-source-id: ef364318d2bbba66e96f3211dd6a975174d52c21
main
Bo Wang 2 years ago committed by Facebook GitHub Bot
parent 00050d4634
commit 9e01de9066
  1. 150
      buckifier/buckify_rocksdb.py
  2. 163
      buckifier/targets_builder.py
  3. 9
      buckifier/targets_cfg.py
  4. 57
      buckifier/util.py
  5. 87
      build_tools/amalgamate.py
  6. 159
      build_tools/benchmark_log_tool.py
  7. 130
      build_tools/error_filter.py
  8. 37
      coverage/parse_gcov_output.py
  9. 14
      tools/advisor/advisor/bench_runner.py
  10. 116
      tools/advisor/advisor/config_optimizer_example.py
  11. 110
      tools/advisor/advisor/db_bench_runner.py
  12. 133
      tools/advisor/advisor/db_config_optimizer.py
  13. 37
      tools/advisor/advisor/db_log_parser.py
  14. 120
      tools/advisor/advisor/db_options_parser.py
  15. 128
      tools/advisor/advisor/db_stats_fetcher.py
  16. 39
      tools/advisor/advisor/db_timeseries_parser.py
  17. 24
      tools/advisor/advisor/ini_parser.py
  18. 200
      tools/advisor/advisor/rule_parser.py
  19. 83
      tools/advisor/advisor/rule_parser_example.py
  20. 100
      tools/advisor/test/test_db_bench_runner.py
  21. 75
      tools/advisor/test/test_db_log_parser.py
  22. 210
      tools/advisor/test/test_db_options_parser.py
  23. 95
      tools/advisor/test/test_db_stats_fetcher.py
  24. 120
      tools/advisor/test/test_rule_parser.py
  25. 112
      tools/benchmark_ci.py
  26. 8
      tools/block_cache_analyzer/block_cache_pysim_test.py
  27. 1
      tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py
  28. 4
      tools/check_all_python.py
  29. 450
      tools/db_crashtest.py
  30. 546
      tools/ldb_test.py
  31. 45
      tools/write_stress_runner.py

@ -1,19 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try: try:
from builtins import str from builtins import str
except ImportError: except ImportError:
from __builtin__ import str from __builtin__ import str
from targets_builder import TARGETSBuilder import fnmatch
import json import json
import os import os
import fnmatch
import sys import sys
from targets_builder import TARGETSBuilder
from util import ColorString from util import ColorString
# This script generates TARGETS file for Buck. # This script generates TARGETS file for Buck.
@ -44,13 +43,13 @@ def parse_src_mk(repo_path):
src_files = {} src_files = {}
for line in open(src_mk): for line in open(src_mk):
line = line.strip() line = line.strip()
if len(line) == 0 or line[0] == '#': if len(line) == 0 or line[0] == "#":
continue continue
if '=' in line: if "=" in line:
current_src = line.split('=')[0].strip() current_src = line.split("=")[0].strip()
src_files[current_src] = [] src_files[current_src] = []
elif '.c' in line: elif ".c" in line:
src_path = line.split('\\')[0].strip() src_path = line.split("\\")[0].strip()
src_files[current_src].append(src_path) src_files[current_src].append(src_path)
return src_files return src_files
@ -58,14 +57,16 @@ def parse_src_mk(repo_path):
# get all .cc / .c files # get all .cc / .c files
def get_cc_files(repo_path): def get_cc_files(repo_path):
cc_files = [] cc_files = []
for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in for root, _dirnames, filenames in os.walk(
root = root[(len(repo_path) + 1):] repo_path
): # noqa: B007 T25377293 Grandfathered in
root = root[(len(repo_path) + 1) :]
if "java" in root: if "java" in root:
# Skip java # Skip java
continue continue
for filename in fnmatch.filter(filenames, '*.cc'): for filename in fnmatch.filter(filenames, "*.cc"):
cc_files.append(os.path.join(root, filename)) cc_files.append(os.path.join(root, filename))
for filename in fnmatch.filter(filenames, '*.c'): for filename in fnmatch.filter(filenames, "*.c"):
cc_files.append(os.path.join(root, filename)) cc_files.append(os.path.join(root, filename))
return cc_files return cc_files
@ -93,14 +94,10 @@ def get_non_parallel_tests(repo_path):
return s return s
# Parse extra dependencies passed by user from command line # Parse extra dependencies passed by user from command line
def get_dependencies(): def get_dependencies():
deps_map = { deps_map = {"": {"extra_deps": [], "extra_compiler_flags": []}}
'': {
'extra_deps': [],
'extra_compiler_flags': []
}
}
if len(sys.argv) < 2: if len(sys.argv) < 2:
return deps_map return deps_map
@ -111,6 +108,7 @@ def get_dependencies():
v = encode_dict(v) v = encode_dict(v)
rv[k] = v rv[k] = v
return rv return rv
extra_deps = json.loads(sys.argv[1], object_hook=encode_dict) extra_deps = json.loads(sys.argv[1], object_hook=encode_dict)
for target_alias, deps in extra_deps.items(): for target_alias, deps in extra_deps.items():
deps_map[target_alias] = deps deps_map[target_alias] = deps
@ -143,73 +141,73 @@ def generate_targets(repo_path, deps_map):
"rocksdb_lib", "rocksdb_lib",
src_mk["LIB_SOURCES"] + src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally # always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] + src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"],
src_mk["TOOL_LIB_SOURCES"],
deps=[ deps=[
"//folly/container:f14_hash", "//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait", "//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect", "//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine", "//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task", "//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"]) "//folly/synchronization:distributed_mutex",
],
)
# rocksdb_whole_archive_lib # rocksdb_whole_archive_lib
TARGETS.add_library( TARGETS.add_library(
"rocksdb_whole_archive_lib", "rocksdb_whole_archive_lib",
src_mk["LIB_SOURCES"] + src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally # always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] + src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"],
src_mk["TOOL_LIB_SOURCES"],
deps=[ deps=[
"//folly/container:f14_hash", "//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait", "//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect", "//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine", "//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task", "//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"], "//folly/synchronization:distributed_mutex",
],
headers=None, headers=None,
extra_external_deps="", extra_external_deps="",
link_whole=True) link_whole=True,
)
# rocksdb_test_lib # rocksdb_test_lib
TARGETS.add_library( TARGETS.add_library(
"rocksdb_test_lib", "rocksdb_test_lib",
src_mk.get("MOCK_LIB_SOURCES", []) + src_mk.get("MOCK_LIB_SOURCES", [])
src_mk.get("TEST_LIB_SOURCES", []) + + src_mk.get("TEST_LIB_SOURCES", [])
src_mk.get("EXP_LIB_SOURCES", []) + + src_mk.get("EXP_LIB_SOURCES", [])
src_mk.get("ANALYZER_LIB_SOURCES", []), + src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"], [":rocksdb_lib"],
extra_test_libs=True extra_test_libs=True,
) )
# rocksdb_tools_lib # rocksdb_tools_lib
TARGETS.add_library( TARGETS.add_library(
"rocksdb_tools_lib", "rocksdb_tools_lib",
src_mk.get("BENCH_LIB_SOURCES", []) + src_mk.get("BENCH_LIB_SOURCES", [])
src_mk.get("ANALYZER_LIB_SOURCES", []) + + src_mk.get("ANALYZER_LIB_SOURCES", [])
["test_util/testutil.cc"], + ["test_util/testutil.cc"],
[":rocksdb_lib"]) [":rocksdb_lib"],
)
# rocksdb_cache_bench_tools_lib # rocksdb_cache_bench_tools_lib
TARGETS.add_library( TARGETS.add_library(
"rocksdb_cache_bench_tools_lib", "rocksdb_cache_bench_tools_lib",
src_mk.get("CACHE_BENCH_LIB_SOURCES", []), src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
[":rocksdb_lib"]) [":rocksdb_lib"],
)
# rocksdb_stress_lib # rocksdb_stress_lib
TARGETS.add_rocksdb_library( TARGETS.add_rocksdb_library(
"rocksdb_stress_lib", "rocksdb_stress_lib",
src_mk.get("ANALYZER_LIB_SOURCES", []) src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', []) + src_mk.get("STRESS_LIB_SOURCES", [])
+ ["test_util/testutil.cc"]) + ["test_util/testutil.cc"],
)
# db_stress binary # db_stress binary
TARGETS.add_binary("db_stress", TARGETS.add_binary(
["db_stress_tool/db_stress.cc"], "db_stress", ["db_stress_tool/db_stress.cc"], [":rocksdb_stress_lib"]
[":rocksdb_stress_lib"]) )
# bench binaries # bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []): for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0] name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0]
TARGETS.add_binary( TARGETS.add_binary(name, [src], [], extra_bench_libs=True)
name,
[src],
[],
extra_bench_libs=True
)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map))) print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path # Dictionary test executable name -> relative source file path
@ -219,7 +217,7 @@ def generate_targets(repo_path, deps_map):
# are more than one .c test file, we need to extend # are more than one .c test file, we need to extend
# TARGETS.add_c_test() to include other C tests too. # TARGETS.add_c_test() to include other C tests too.
for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []): for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
if test_src != 'db/c_test.c': if test_src != "db/c_test.c":
print("Don't know how to deal with " + test_src) print("Don't know how to deal with " + test_src)
return False return False
TARGETS.add_c_test() TARGETS.add_c_test()
@ -229,7 +227,7 @@ def generate_targets(repo_path, deps_map):
fast_fancy_bench_config_list = json.load(json_file) fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list: for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {} clean_benchmarks = {}
benchmarks = config_dict['benchmarks'] benchmarks = config_dict["benchmarks"]
for binary, benchmark_dict in benchmarks.items(): for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {} clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items(): for benchmark, overloaded_metric_list in benchmark_dict.items():
@ -237,13 +235,20 @@ def generate_targets(repo_path, deps_map):
for metric in overloaded_metric_list: for metric in overloaded_metric_list:
if not isinstance(metric, dict): if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric) clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold']) TARGETS.add_fancy_bench_config(
config_dict["name"],
clean_benchmarks,
False,
config_dict["expected_runtime_one_iter"],
config_dict["sl_iterations"],
config_dict["regression_threshold"],
)
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file: with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file) slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list: for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {} clean_benchmarks = {}
benchmarks = config_dict['benchmarks'] benchmarks = config_dict["benchmarks"]
for binary, benchmark_dict in benchmarks.items(): for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {} clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items(): for benchmark, overloaded_metric_list in benchmark_dict.items():
@ -252,7 +257,14 @@ def generate_targets(repo_path, deps_map):
if not isinstance(metric, dict): if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric) clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list: for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold']) TARGETS.add_fancy_bench_config(
config_dict["name"] + "_slow",
clean_benchmarks,
True,
config_dict["expected_runtime_one_iter"],
config_dict["sl_iterations"],
config_dict["regression_threshold"],
)
# it is better servicelab experiments break # it is better servicelab experiments break
# than rocksdb github ci # than rocksdb github ci
except Exception: except Exception:
@ -261,7 +273,7 @@ def generate_targets(repo_path, deps_map):
TARGETS.add_test_header() TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []): for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip() test = test_src.split(".c")[0].strip().split("/")[-1].strip()
test_source_map[test] = test_src test_source_map[test] = test_src
print("" + test + " " + test_src) print("" + test + " " + test_src)
@ -271,23 +283,29 @@ def generate_targets(repo_path, deps_map):
print(ColorString.warning("Failed to get test name for %s" % test_src)) print(ColorString.warning("Failed to get test name for %s" % test_src))
continue continue
test_target_name = \ test_target_name = test if not target_alias else test + "_" + target_alias
test if not target_alias else test + "_" + target_alias
if test in _EXPORTED_TEST_LIBS: if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True) TARGETS.add_library(
test_library,
[test_src],
deps=[":rocksdb_test_lib"],
extra_test_libs=True,
)
TARGETS.register_test( TARGETS.register_test(
test_target_name, test_target_name,
test_src, test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]), deps=json.dumps(deps["extra_deps"] + [":" + test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags'])) extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
)
else: else:
TARGETS.register_test( TARGETS.register_test(
test_target_name, test_target_name,
test_src, test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ), deps=json.dumps(deps["extra_deps"] + [":rocksdb_test_lib"]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags'])) extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
)
print(ColorString.info("Generated TARGETS Summary:")) print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib)) print(ColorString.info("- %d libs" % TARGETS.total_lib))
@ -300,8 +318,7 @@ def get_rocksdb_path():
# rocksdb = {script_dir}/.. # rocksdb = {script_dir}/..
script_dir = os.path.dirname(sys.argv[0]) script_dir = os.path.dirname(sys.argv[0])
script_dir = os.path.abspath(script_dir) script_dir = os.path.abspath(script_dir)
rocksdb_path = os.path.abspath( rocksdb_path = os.path.abspath(os.path.join(script_dir, "../"))
os.path.join(script_dir, "../"))
return rocksdb_path return rocksdb_path
@ -318,5 +335,6 @@ def main():
if not ok: if not ok:
exit_with_error("Failed to generate TARGETS files") exit_with_error("Failed to generate TARGETS files")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

@ -1,113 +1,150 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try: try:
from builtins import object from builtins import object, str
from builtins import str
except ImportError: except ImportError:
from __builtin__ import object from __builtin__ import object, str
from __builtin__ import str
import targets_cfg
import pprint import pprint
import targets_cfg
def pretty_list(lst, indent=8): def pretty_list(lst, indent=8):
if lst is None or len(lst) == 0: if lst is None or len(lst) == 0:
return "" return ""
if len(lst) == 1: if len(lst) == 1:
return "\"%s\"" % lst[0] return '"%s"' % lst[0]
separator = "\",\n%s\"" % (" " * indent) separator = '",\n%s"' % (" " * indent)
res = separator.join(sorted(lst)) res = separator.join(sorted(lst))
res = "\n" + (" " * indent) + "\"" + res + "\",\n" + (" " * (indent - 4)) res = "\n" + (" " * indent) + '"' + res + '",\n' + (" " * (indent - 4))
return res return res
class TARGETSBuilder(object): class TARGETSBuilder(object):
def __init__(self, path, extra_argv): def __init__(self, path, extra_argv):
self.path = path self.path = path
self.targets_file = open(path, 'wb')
header = targets_cfg.rocksdb_target_header_template.format( header = targets_cfg.rocksdb_target_header_template.format(
extra_argv=extra_argv) extra_argv=extra_argv
self.targets_file.write(header.encode("utf-8")) )
with open(path, "wb") as targets_file:
targets_file.write(header.encode("utf-8"))
self.total_lib = 0 self.total_lib = 0
self.total_bin = 0 self.total_bin = 0
self.total_test = 0 self.total_test = 0
self.tests_cfg = "" self.tests_cfg = ""
def __del__(self): def add_library(
self.targets_file.close() self,
name,
def add_library(self, name, srcs, deps=None, headers=None, srcs,
extra_external_deps="", link_whole=False, deps=None,
external_dependencies=None, extra_test_libs=False): headers=None,
extra_external_deps="",
link_whole=False,
external_dependencies=None,
extra_test_libs=False,
):
if headers is not None: if headers is not None:
headers = "[" + pretty_list(headers) + "]" headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.library_template.format( with open(self.path, "ab") as targets_file:
name=name, targets_file.write(
srcs=pretty_list(srcs), targets_cfg.library_template.format(
headers=headers, name=name,
deps=pretty_list(deps), srcs=pretty_list(srcs),
extra_external_deps=extra_external_deps, headers=headers,
link_whole=link_whole, deps=pretty_list(deps),
external_dependencies=pretty_list(external_dependencies), extra_external_deps=extra_external_deps,
extra_test_libs=extra_test_libs link_whole=link_whole,
).encode("utf-8")) external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs,
).encode("utf-8")
)
self.total_lib = self.total_lib + 1 self.total_lib = self.total_lib + 1
def add_rocksdb_library(self, name, srcs, headers=None, def add_rocksdb_library(self, name, srcs, headers=None, external_dependencies=None):
external_dependencies=None):
if headers is not None: if headers is not None:
headers = "[" + pretty_list(headers) + "]" headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.rocksdb_library_template.format( with open(self.path, "ab") as targets_file:
name=name, targets_file.write(
srcs=pretty_list(srcs), targets_cfg.rocksdb_library_template.format(
headers=headers, name=name,
external_dependencies=pretty_list(external_dependencies) srcs=pretty_list(srcs),
).encode("utf-8") headers=headers,
external_dependencies=pretty_list(external_dependencies),
).encode("utf-8")
) )
self.total_lib = self.total_lib + 1 self.total_lib = self.total_lib + 1
def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False): def add_binary(
self.targets_file.write(targets_cfg.binary_template.format( self,
name=name, name,
srcs=pretty_list(srcs), srcs,
deps=pretty_list(deps), deps=None,
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags), extra_preprocessor_flags=None,
extra_bench_libs=extra_bench_libs, extra_bench_libs=False,
).encode("utf-8")) ):
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8")
)
self.total_bin = self.total_bin + 1 self.total_bin = self.total_bin + 1
def add_c_test(self): def add_c_test(self):
self.targets_file.write(b""" with open(self.path, "ab") as targets_file:
targets_file.write(
b"""
add_c_test_wrapper() add_c_test_wrapper()
""") """
)
def add_test_header(self): def add_test_header(self):
self.targets_file.write(b""" with open(self.path, "ab") as targets_file:
targets_file.write(
b"""
# Generate a test rule for each entry in ROCKS_TESTS # Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code # Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included. # will not be included.
""") """
)
def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold): def add_fancy_bench_config(
self.targets_file.write(targets_cfg.fancy_bench_template.format( self,
name,
bench_config,
slow,
expected_runtime,
sl_iterations,
regression_threshold,
):
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.fancy_bench_template.format(
name=name, name=name,
bench_config=pprint.pformat(bench_config), bench_config=pprint.pformat(bench_config),
slow=slow, slow=slow,
expected_runtime=expected_runtime, expected_runtime=expected_runtime,
sl_iterations=sl_iterations, sl_iterations=sl_iterations,
regression_threshold=regression_threshold regression_threshold=regression_threshold,
).encode("utf-8")) ).encode("utf-8")
)
def register_test(self,
test_name,
src,
deps,
extra_compiler_flags):
self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps, def register_test(self, test_name, src, deps, extra_compiler_flags):
extra_compiler_flags=extra_compiler_flags).encode("utf-8")) with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.unittests_template.format(
test_name=test_name,
test_cc=str(src),
deps=deps,
extra_compiler_flags=extra_compiler_flags,
).encode("utf-8")
)
self.total_test = self.total_test + 1 self.total_test = self.total_test + 1

@ -1,11 +1,7 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
rocksdb_target_header_template = \ rocksdb_target_header_template = """# This file \100generated by:
"""# This file \100generated by:
#$ python3 buckifier/buckify_rocksdb.py{extra_argv} #$ python3 buckifier/buckify_rocksdb.py{extra_argv}
# --> DO NOT EDIT MANUALLY <-- # --> DO NOT EDIT MANUALLY <--
# This file is a Facebook-specific integration for buck builds, so can # This file is a Facebook-specific integration for buck builds, so can
@ -27,7 +23,6 @@ rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
""" """
binary_template = """ binary_template = """
cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs}) cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
""" """

@ -2,37 +2,35 @@
""" """
This module keeps commonly used components. This module keeps commonly used components.
""" """
from __future__ import absolute_import from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try: try:
from builtins import object from builtins import object
except ImportError: except ImportError:
from __builtin__ import object from __builtin__ import object
import os
import subprocess import subprocess
import sys import sys
import os
import time import time
class ColorString(object): class ColorString(object):
""" Generate colorful strings on terminal """ """Generate colorful strings on terminal"""
HEADER = '\033[95m'
BLUE = '\033[94m' HEADER = "\033[95m"
GREEN = '\033[92m' BLUE = "\033[94m"
WARNING = '\033[93m' GREEN = "\033[92m"
FAIL = '\033[91m' WARNING = "\033[93m"
ENDC = '\033[0m' FAIL = "\033[91m"
ENDC = "\033[0m"
@staticmethod @staticmethod
def _make_color_str(text, color): def _make_color_str(text, color):
# In Python2, default encoding for unicode string is ASCII # In Python2, default encoding for unicode string is ASCII
if sys.version_info.major <= 2: if sys.version_info.major <= 2:
return "".join( return "".join([color, text.encode("utf-8"), ColorString.ENDC])
[color, text.encode('utf-8'), ColorString.ENDC])
# From Python3, default encoding for unicode string is UTF-8 # From Python3, default encoding for unicode string is UTF-8
return "".join( return "".join([color, text, ColorString.ENDC])
[color, text, ColorString.ENDC])
@staticmethod @staticmethod
def ok(text): def ok(text):
@ -68,37 +66,38 @@ class ColorString(object):
def run_shell_command(shell_cmd, cmd_dir=None): def run_shell_command(shell_cmd, cmd_dir=None):
""" Run a single shell command. """Run a single shell command.
@returns a tuple of shell command return code, stdout, stderr """ @returns a tuple of shell command return code, stdout, stderr"""
if cmd_dir is not None and not os.path.exists(cmd_dir): if cmd_dir is not None and not os.path.exists(cmd_dir):
run_shell_command("mkdir -p %s" % cmd_dir) run_shell_command("mkdir -p %s" % cmd_dir)
start = time.time() start = time.time()
print("\t>>> Running: " + shell_cmd) print("\t>>> Running: " + shell_cmd)
p = subprocess.Popen(shell_cmd, p = subprocess.Popen( # noqa
shell=True, shell_cmd,
stdout=subprocess.PIPE, shell=True,
stderr=subprocess.PIPE, stdout=subprocess.PIPE,
cwd=cmd_dir) stderr=subprocess.PIPE,
cwd=cmd_dir,
)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
end = time.time() end = time.time()
# Report time if we spent more than 5 minutes executing a command # Report time if we spent more than 5 minutes executing a command
execution_time = end - start execution_time = end - start
if execution_time > (60 * 5): if execution_time > (60 * 5):
mins = (execution_time / 60) mins = execution_time / 60
secs = (execution_time % 60) secs = execution_time % 60
print("\t>time spent: %d minutes %d seconds" % (mins, secs)) print("\t>time spent: %d minutes %d seconds" % (mins, secs))
return p.returncode, stdout, stderr return p.returncode, stdout, stderr
def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False): def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False):
""" Execute a sequence of shell commands, which is equivalent to """Execute a sequence of shell commands, which is equivalent to
running `cmd1 && cmd2 && cmd3` running `cmd1 && cmd2 && cmd3`
@returns boolean indication if all commands succeeds. @returns boolean indication if all commands succeeds.
""" """
if cmd_dir: if cmd_dir:

@ -28,14 +28,15 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
from os import path
import re import re
import sys import sys
from os import path
include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$') include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$')
included = set() included = set()
excluded = set() excluded = set()
def find_header(name, abs_path, include_paths): def find_header(name, abs_path, include_paths):
samedir = path.join(path.dirname(abs_path), name) samedir = path.join(path.dirname(abs_path), name)
if path.exists(samedir): if path.exists(samedir):
@ -46,17 +47,31 @@ def find_header(name, abs_path, include_paths):
return include_path return include_path
return None return None
def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths):
def expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
):
if include_path in included: if include_path in included:
return False return False
included.add(include_path) included.add(include_path)
with open(include_path) as f: with open(include_path) as f:
print('#line 1 "{}"'.format(include_path), file=source_out) print('#line 1 "{}"'.format(include_path), file=source_out)
process_file(f, include_path, source_out, header_out, include_paths, public_include_paths) process_file(
f, include_path, source_out, header_out, include_paths, public_include_paths
)
return True return True
def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths):
def process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
):
for (line, text) in enumerate(f): for (line, text) in enumerate(f):
m = include_re.match(text) m = include_re.match(text)
if m: if m:
@ -68,7 +83,15 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl
source_out.write(text) source_out.write(text)
expanded = False expanded = False
else: else:
expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths) expanded = expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
)
else: else:
# now try public headers # now try public headers
include_path = find_header(filename, abs_path, public_include_paths) include_path = find_header(filename, abs_path, public_include_paths)
@ -78,23 +101,52 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl
if include_path in excluded: if include_path in excluded:
source_out.write(text) source_out.write(text)
else: else:
expand_include(include_path, f, abs_path, header_out, None, public_include_paths, []) expand_include(
include_path,
f,
abs_path,
header_out,
None,
public_include_paths,
[],
)
else: else:
sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line)) sys.exit(
"unable to find {}, included in {} on line {}".format(
filename, abs_path, line
)
)
if expanded: if expanded:
print('#line {} "{}"'.format(line+1, abs_path), file=source_out) print('#line {} "{}"'.format(line + 1, abs_path), file=source_out)
elif text != "#pragma once\n": elif text != "#pragma once\n":
source_out.write(text) source_out.write(text)
def main(): def main():
parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation") parser = argparse.ArgumentParser(
description="Transform a unity build into an amalgamation"
)
parser.add_argument("source", help="source file") parser.add_argument("source", help="source file")
parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers") parser.add_argument(
parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers") "-I",
parser.add_argument("-x", action="append", dest="excluded", help="excluded header files") action="append",
dest="include_paths",
help="include paths for private headers",
)
parser.add_argument(
"-i",
action="append",
dest="public_include_paths",
help="include paths for public headers",
)
parser.add_argument(
"-x", action="append", dest="excluded", help="excluded header files"
)
parser.add_argument("-o", dest="source_out", help="output C++ file", required=True) parser.add_argument("-o", dest="source_out", help="output C++ file", required=True)
parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True) parser.add_argument(
"-H", dest="header_out", help="output C++ header file", required=True
)
args = parser.parse_args() args = parser.parse_args()
include_paths = list(map(path.abspath, args.include_paths or [])) include_paths = list(map(path.abspath, args.include_paths or []))
@ -102,10 +154,15 @@ def main():
excluded.update(map(path.abspath, args.excluded or [])) excluded.update(map(path.abspath, args.excluded or []))
filename = args.source filename = args.source
abs_path = path.abspath(filename) abs_path = path.abspath(filename)
with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out: with open(filename) as f, open(args.source_out, "w") as source_out, open(
args.header_out, "w"
) as header_out:
print('#line 1 "{}"'.format(filename), file=source_out) print('#line 1 "{}"'.format(filename), file=source_out)
print('#include "{}"'.format(header_out.name), file=source_out) print('#include "{}"'.format(header_out.name), file=source_out)
process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths) process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

@ -4,23 +4,27 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
'''Access the results of benchmark runs """Access the results of benchmark runs
Send these results on to OpenSearch graphing service Send these results on to OpenSearch graphing service
''' """
import argparse import argparse
import itertools import itertools
import logging
import os import os
import re import re
import sys import sys
import requests import requests
from dateutil import parser from dateutil import parser
import logging
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
class Configuration: class Configuration:
opensearch_user = os.environ['ES_USER'] opensearch_user = os.environ["ES_USER"]
opensearch_pass = os.environ['ES_PASS'] opensearch_pass = os.environ["ES_PASS"]
class BenchmarkResultException(Exception): class BenchmarkResultException(Exception):
def __init__(self, message, content): def __init__(self, message, content):
@ -30,45 +34,71 @@ class BenchmarkResultException(Exception):
class BenchmarkUtils: class BenchmarkUtils:
expected_keys = ['ops_sec', 'mb_sec', 'lsm_sz', 'blob_sz', 'c_wgb', 'w_amp', expected_keys = [
'c_mbps', 'c_wsecs', 'c_csecs', 'b_rgb', 'b_wgb', 'usec_op', "ops_sec",
'p50', 'p99', 'p99.9', 'p99.99', 'pmax', "mb_sec",
'uptime', 'stall%', 'Nstall', 'u_cpu', 's_cpu', 'rss', 'test', 'date', 'version', 'job_id'] "lsm_sz",
"blob_sz",
"c_wgb",
"w_amp",
"c_mbps",
"c_wsecs",
"c_csecs",
"b_rgb",
"b_wgb",
"usec_op",
"p50",
"p99",
"p99.9",
"p99.99",
"pmax",
"uptime",
"stall%",
"Nstall",
"u_cpu",
"s_cpu",
"rss",
"test",
"date",
"version",
"job_id",
]
def sanity_check(row): def sanity_check(row):
if not 'test' in row: if "test" not in row:
logging.debug(f"not 'test' in row: {row}") logging.debug(f"not 'test' in row: {row}")
return False return False
if row['test'] == '': if row["test"] == "":
logging.debug(f"row['test'] == '': {row}") logging.debug(f"row['test'] == '': {row}")
return False return False
if not 'date' in row: if "date" not in row:
logging.debug(f"not 'date' in row: {row}") logging.debug(f"not 'date' in row: {row}")
return False return False
if not 'ops_sec' in row: if "ops_sec" not in row:
logging.debug(f"not 'ops_sec' in row: {row}") logging.debug(f"not 'ops_sec' in row: {row}")
return False return False
try: try:
v = int(row['ops_sec']) _ = int(row["ops_sec"])
except (ValueError, TypeError): except (ValueError, TypeError):
logging.debug(f"int(row['ops_sec']): {row}") logging.debug(f"int(row['ops_sec']): {row}")
return False return False
try: try:
(_, _) = parser.parse(row['date'], fuzzy_with_tokens=True) (_, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
except (parser.ParserError): except (parser.ParserError):
logging.error(f"parser.parse((row['date']): not a valid format for date in row: {row}") logging.error(
f"parser.parse((row['date']): not a valid format for date in row: {row}"
)
return False return False
return True return True
def conform_opensearch(row): def conform_opensearch(row):
(dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True) (dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
# create a test_date field, which was previously what was expected # create a test_date field, which was previously what was expected
# repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month) # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
# e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55 # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
row['test_date'] = dt.isoformat() row["test_date"] = dt.isoformat()
row['date'] = dt.isoformat() row["date"] = dt.isoformat()
return dict((key.replace('.', '_'), value) return {key.replace(".", "_") : value for key, value in row.items()}
for (key, value) in row.items())
class ResultParser: class ResultParser:
@ -80,24 +110,24 @@ class ResultParser:
def ignore(self, l_in: str): def ignore(self, l_in: str):
if len(l_in) == 0: if len(l_in) == 0:
return True return True
if l_in[0:1] == '#': if l_in[0:1] == "#":
return True return True
return False return False
def line(self, l_in: str): def line(self, line_in: str):
'''Parse a line into items """Parse a line into items
Being clever about separators Being clever about separators
''' """
l = l_in line = line_in
row = [] row = []
while l != '': while line != "":
match_item = self.field.match(l) match_item = self.field.match(line)
if match_item: if match_item:
item = match_item.group(0) item = match_item.group(0)
row.append(item) row.append(item)
l = l[len(item):] line = line[len(item) :]
else: else:
match_intra = self.intra.match(l) match_intra = self.intra.match(line)
if match_intra: if match_intra:
intra = match_intra.group(0) intra = match_intra.group(0)
# Count the separators # Count the separators
@ -107,26 +137,27 @@ class ResultParser:
sep_count = len(tabbed) - 1 sep_count = len(tabbed) - 1
if sep_count == 0: if sep_count == 0:
sep_count = 1 sep_count = 1
for i in range(sep_count-1): for _ in range(sep_count - 1):
row.append('') row.append("")
l = l[len(intra):] line = line[len(intra) :]
else: else:
raise BenchmarkResultException( raise BenchmarkResultException("Invalid TSV line", f"{line_in} at {line}")
'Invalid TSV line', f"{l_in} at {l}")
return row return row
def parse(self, lines): def parse(self, lines):
'''Parse something that iterates lines''' """Parse something that iterates lines"""
rows = [self.line(line) for line in lines if not self.ignore(line)] rows = [self.line(line) for line in lines if not self.ignore(line)]
header = rows[0] header = rows[0]
width = len(header) width = len(header)
records = [{k: v for (k, v) in itertools.zip_longest( records = [
header, row[:width])} for row in rows[1:]] {k: v for (k, v) in itertools.zip_longest(header, row[:width])}
for row in rows[1:]
]
return records return records
def load_report_from_tsv(filename: str): def load_report_from_tsv(filename: str):
file = open(filename, 'r') file = open(filename, "r")
contents = file.readlines() contents = file.readlines()
file.close() file.close()
parser = ResultParser() parser = ResultParser()
@ -136,52 +167,70 @@ def load_report_from_tsv(filename: str):
def push_report_to_opensearch(report, esdocument): def push_report_to_opensearch(report, esdocument):
sanitized = [BenchmarkUtils.conform_opensearch(row) sanitized = [
for row in report if BenchmarkUtils.sanity_check(row)] BenchmarkUtils.conform_opensearch(row)
logging.debug(f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch") for row in report
if BenchmarkUtils.sanity_check(row)
]
logging.debug(
f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch"
)
for single_benchmark in sanitized: for single_benchmark in sanitized:
logging.debug(f"upload benchmark: {single_benchmark}") logging.debug(f"upload benchmark: {single_benchmark}")
response = requests.post( response = requests.post(
esdocument, esdocument,
json=single_benchmark, auth=(os.environ['ES_USER'], os.environ['ES_PASS'])) json=single_benchmark,
auth=(os.environ["ES_USER"], os.environ["ES_PASS"]),
)
logging.debug( logging.debug(
f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}") f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}"
)
response.raise_for_status() response.raise_for_status()
def push_report_to_null(report): def push_report_to_null(report):
for row in report: for row in report:
if BenchmarkUtils.sanity_check(row): if BenchmarkUtils.sanity_check(row):
logging.debug(f"row {row}") logging.debug(f"row {row}")
conformed = BenchmarkUtils.conform_opensearch(row) conformed = BenchmarkUtils.conform_opensearch(row)
logging.debug(f"conformed row {conformed}") logging.debug(f"conformed row {conformed}")
def main(): def main():
'''Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch """Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
This tool will This tool will
(1) Open a local tsv benchmark report file (1) Open a local tsv benchmark report file
(2) Upload to OpenSearch document, via https/JSON (2) Upload to OpenSearch document, via https/JSON
''' """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.")
description='CircleCI benchmark scraper.')
# --tsvfile is the name of the file to read results from # --tsvfile is the name of the file to read results from
# --esdocument is the ElasticSearch document to push these results into # --esdocument is the ElasticSearch document to push these results into
# #
parser.add_argument('--tsvfile', default='build_tools/circle_api_scraper_input.txt', parser.add_argument(
help='File from which to read tsv report') "--tsvfile",
parser.add_argument('--esdocument', help='ElasticSearch/OpenSearch document URL to upload report into') default="build_tools/circle_api_scraper_input.txt",
parser.add_argument('--upload', choices=['opensearch', 'none'], default='opensearch') help="File from which to read tsv report",
)
parser.add_argument(
"--esdocument",
help="ElasticSearch/OpenSearch document URL to upload report into",
)
parser.add_argument(
"--upload", choices=["opensearch", "none"], default="opensearch"
)
args = parser.parse_args() args = parser.parse_args()
logging.debug(f"Arguments: {args}") logging.debug(f"Arguments: {args}")
reports = load_report_from_tsv(args.tsvfile) reports = load_report_from_tsv(args.tsvfile)
if (args.upload == 'opensearch'): if args.upload == "opensearch":
push_report_to_opensearch(reports, args.esdocument) push_report_to_opensearch(reports, args.esdocument)
else: else:
push_report_to_null(reports) push_report_to_null(reports)
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

@ -3,16 +3,13 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
'''Filter for error messages in test output: """Filter for error messages in test output:
- Receives merged stdout/stderr from test on stdin - Receives merged stdout/stderr from test on stdin
- Finds patterns of known error messages for test name (first argument) - Finds patterns of known error messages for test name (first argument)
- Prints those error messages to stdout - Prints those error messages to stdout
''' """
from __future__ import absolute_import from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import re import re
import sys import sys
@ -20,23 +17,24 @@ import sys
class ErrorParserBase(object): class ErrorParserBase(object):
def parse_error(self, line): def parse_error(self, line):
'''Parses a line of test output. If it contains an error, returns a """Parses a line of test output. If it contains an error, returns a
formatted message describing the error; otherwise, returns None. formatted message describing the error; otherwise, returns None.
Subclasses must override this method. Subclasses must override this method.
''' """
raise NotImplementedError raise NotImplementedError
class GTestErrorParser(ErrorParserBase): class GTestErrorParser(ErrorParserBase):
'''A parser that remembers the last test that began running so it can print """A parser that remembers the last test that began running so it can print
that test's name upon detecting failure. that test's name upon detecting failure.
''' """
_GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$')
_GTEST_NAME_PATTERN = re.compile(r"\[ RUN \] (\S+)$")
# format: '<filename or "unknown file">:<line #>: Failure' # format: '<filename or "unknown file">:<line #>: Failure'
_GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$') _GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$")
def __init__(self): def __init__(self):
self._last_gtest_name = 'Unknown test' self._last_gtest_name = "Unknown test"
def parse_error(self, line): def parse_error(self, line):
gtest_name_match = self._GTEST_NAME_PATTERN.match(line) gtest_name_match = self._GTEST_NAME_PATTERN.match(line)
@ -45,14 +43,13 @@ class GTestErrorParser(ErrorParserBase):
return None return None
gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
if gtest_fail_match: if gtest_fail_match:
return '%s failed: %s' % ( return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1))
self._last_gtest_name, gtest_fail_match.group(1))
return None return None
class MatchErrorParser(ErrorParserBase): class MatchErrorParser(ErrorParserBase):
'''A simple parser that returns the whole line if it matches the pattern. """A simple parser that returns the whole line if it matches the pattern."""
'''
def __init__(self, pattern): def __init__(self, pattern):
self._pattern = re.compile(pattern) self._pattern = re.compile(pattern)
@ -69,97 +66,104 @@ class CompilerErrorParser(MatchErrorParser):
# format (link error): # format (link error):
# '<filename>:<line #>: error: <error msg>' # '<filename>:<line #>: error: <error msg>'
# The below regex catches both # The below regex catches both
super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:') super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:")
class ScanBuildErrorParser(MatchErrorParser): class ScanBuildErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(ScanBuildErrorParser, self).__init__( super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$")
r'scan-build: \d+ bugs found.$')
class DbCrashErrorParser(MatchErrorParser): class DbCrashErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.') super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.")
class WriteStressErrorParser(MatchErrorParser): class WriteStressErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(WriteStressErrorParser, self).__init__( super(WriteStressErrorParser, self).__init__(
r'ERROR: write_stress died with exitcode=\d+') r"ERROR: write_stress died with exitcode=\d+"
)
class AsanErrorParser(MatchErrorParser): class AsanErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(AsanErrorParser, self).__init__( super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:")
r'==\d+==ERROR: AddressSanitizer:')
class UbsanErrorParser(MatchErrorParser): class UbsanErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
# format: '<filename>:<line #>:<column #>: runtime error: <error msg>' # format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:') super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:")
class ValgrindErrorParser(MatchErrorParser): class ValgrindErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
# just grab the summary, valgrind doesn't clearly distinguish errors # just grab the summary, valgrind doesn't clearly distinguish errors
# from other log messages. # from other log messages.
super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:') super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:")
class CompatErrorParser(MatchErrorParser): class CompatErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$') super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$")
class TsanErrorParser(MatchErrorParser): class TsanErrorParser(MatchErrorParser):
def __init__(self): def __init__(self):
super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:') super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:")
_TEST_NAME_TO_PARSERS = { _TEST_NAME_TO_PARSERS = {
'punit': [CompilerErrorParser, GTestErrorParser], "punit": [CompilerErrorParser, GTestErrorParser],
'unit': [CompilerErrorParser, GTestErrorParser], "unit": [CompilerErrorParser, GTestErrorParser],
'release': [CompilerErrorParser, GTestErrorParser], "release": [CompilerErrorParser, GTestErrorParser],
'unit_481': [CompilerErrorParser, GTestErrorParser], "unit_481": [CompilerErrorParser, GTestErrorParser],
'release_481': [CompilerErrorParser, GTestErrorParser], "release_481": [CompilerErrorParser, GTestErrorParser],
'clang_unit': [CompilerErrorParser, GTestErrorParser], "clang_unit": [CompilerErrorParser, GTestErrorParser],
'clang_release': [CompilerErrorParser, GTestErrorParser], "clang_release": [CompilerErrorParser, GTestErrorParser],
'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser], "clang_analyze": [CompilerErrorParser, ScanBuildErrorParser],
'code_cov': [CompilerErrorParser, GTestErrorParser], "code_cov": [CompilerErrorParser, GTestErrorParser],
'unity': [CompilerErrorParser, GTestErrorParser], "unity": [CompilerErrorParser, GTestErrorParser],
'lite': [CompilerErrorParser], "lite": [CompilerErrorParser],
'lite_test': [CompilerErrorParser, GTestErrorParser], "lite_test": [CompilerErrorParser, GTestErrorParser],
'stress_crash': [CompilerErrorParser, DbCrashErrorParser], "stress_crash": [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser], "stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_txn': [CompilerErrorParser, DbCrashErrorParser], "stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser],
'write_stress': [CompilerErrorParser, WriteStressErrorParser], "write_stress": [CompilerErrorParser, WriteStressErrorParser],
'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser], "asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], "asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], "asan_crash_with_atomic_flush": [
'asan_crash_with_txn': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], CompilerErrorParser,
'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], AsanErrorParser,
'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], DbCrashErrorParser,
'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], ],
'ubsan_crash_with_txn': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], "asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], "ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser], "ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'format_compatible': [CompilerErrorParser, CompatErrorParser], "ubsan_crash_with_atomic_flush": [
'run_format_compatible': [CompilerErrorParser, CompatErrorParser], CompilerErrorParser,
'no_compression': [CompilerErrorParser, GTestErrorParser], UbsanErrorParser,
'run_no_compression': [CompilerErrorParser, GTestErrorParser], DbCrashErrorParser,
'regression': [CompilerErrorParser], ],
'run_regression': [CompilerErrorParser], "ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
"valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
"tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
"format_compatible": [CompilerErrorParser, CompatErrorParser],
"run_format_compatible": [CompilerErrorParser, CompatErrorParser],
"no_compression": [CompilerErrorParser, GTestErrorParser],
"run_no_compression": [CompilerErrorParser, GTestErrorParser],
"regression": [CompilerErrorParser],
"run_regression": [CompilerErrorParser],
} }
def main(): def main():
if len(sys.argv) != 2: if len(sys.argv) != 2:
return 'Usage: %s <test name>' % sys.argv[0] return "Usage: %s <test name>" % sys.argv[0]
test_name = sys.argv[1] test_name = sys.argv[1]
if test_name not in _TEST_NAME_TO_PARSERS: if test_name not in _TEST_NAME_TO_PARSERS:
return 'Unknown test name: %s' % test_name return "Unknown test name: %s" % test_name
error_parsers = [] error_parsers = []
for parser_cls in _TEST_NAME_TO_PARSERS[test_name]: for parser_cls in _TEST_NAME_TO_PARSERS[test_name]:
@ -173,5 +177,5 @@ def main():
print(error_msg) print(error_msg)
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

@ -47,35 +47,39 @@ def parse_gcov_report(gcov_input):
return per_file_coverage, total_coverage return per_file_coverage, total_coverage
def get_option_parser(): def get_option_parser():
usage = "Parse the gcov output and generate more human-readable code " +\ usage = (
"coverage report." "Parse the gcov output and generate more human-readable code "
+ "coverage report."
)
parser = optparse.OptionParser(usage) parser = optparse.OptionParser(usage)
parser.add_option( parser.add_option(
"--interested-files", "-i", "--interested-files",
"-i",
dest="filenames", dest="filenames",
help="Comma separated files names. if specified, we will display " + help="Comma separated files names. if specified, we will display "
"the coverage report only for interested source files. " + + "the coverage report only for interested source files. "
"Otherwise we will display the coverage report for all " + + "Otherwise we will display the coverage report for all "
"source files." + "source files.",
) )
return parser return parser
def display_file_coverage(per_file_coverage, total_coverage): def display_file_coverage(per_file_coverage, total_coverage):
# To print out auto-adjustable column, we need to know the longest # To print out auto-adjustable column, we need to know the longest
# length of file names. # length of file names.
max_file_name_length = max( max_file_name_length = max(len(fname) for fname in per_file_coverage.keys())
len(fname) for fname in per_file_coverage.keys()
)
# -- Print header # -- Print header
# size of separator is determined by 3 column sizes: # size of separator is determined by 3 column sizes:
# file name, coverage percentage and lines. # file name, coverage percentage and lines.
header_template = \ header_template = "%" + str(max_file_name_length) + "s\t%s\t%s"
"%" + str(max_file_name_length) + "s\t%s\t%s"
separator = "-" * (max_file_name_length + 10 + 20) separator = "-" * (max_file_name_length + 10 + 20)
print(header_template % ("Filename", "Coverage", "Lines")) # noqa: E999 T25377293 Grandfathered in print(
header_template % ("Filename", "Coverage", "Lines")
) # noqa: E999 T25377293 Grandfathered in
print(separator) print(separator)
# -- Print body # -- Print body
@ -91,13 +95,14 @@ def display_file_coverage(per_file_coverage, total_coverage):
print(separator) print(separator)
print(record_template % ("Total", total_coverage[0], total_coverage[1])) print(record_template % ("Total", total_coverage[0], total_coverage[1]))
def report_coverage(): def report_coverage():
parser = get_option_parser() parser = get_option_parser()
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
interested_files = set() interested_files = set()
if options.filenames is not None: if options.filenames is not None:
interested_files = set(f.strip() for f in options.filenames.split(',')) interested_files = {f.strip() for f in options.filenames.split(",")}
# To make things simple, right now we only read gcov report from the input # To make things simple, right now we only read gcov report from the input
per_file_coverage, total_coverage = parse_gcov_report(sys.stdin) per_file_coverage, total_coverage = parse_gcov_report(sys.stdin)
@ -105,7 +110,8 @@ def report_coverage():
# Check if we need to display coverage info for interested files. # Check if we need to display coverage info for interested files.
if len(interested_files): if len(interested_files):
per_file_coverage = dict( per_file_coverage = dict(
(fname, per_file_coverage[fname]) for fname in interested_files (fname, per_file_coverage[fname])
for fname in interested_files
if fname in per_file_coverage if fname in per_file_coverage
) )
# If we only interested in several files, it makes no sense to report # If we only interested in several files, it makes no sense to report
@ -117,5 +123,6 @@ def report_coverage():
return return
display_file_coverage(per_file_coverage, total_coverage) display_file_coverage(per_file_coverage, total_coverage)
if __name__ == "__main__": if __name__ == "__main__":
report_coverage() report_coverage()

@ -3,8 +3,8 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from abc import ABC, abstractmethod
import re import re
from abc import ABC, abstractmethod
class BenchmarkRunner(ABC): class BenchmarkRunner(ABC):
@ -25,15 +25,15 @@ class BenchmarkRunner(ABC):
# 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is # 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is
# not specified in the OPTIONS file, then the location of the log file # not specified in the OPTIONS file, then the location of the log file
# will be /dev/shm and the name of the file will be 'LOG' # will be /dev/shm and the name of the file will be 'LOG'
file_name = '' file_name = ""
if log_dir: if log_dir:
# refer GetInfoLogPrefix() in rocksdb/util/filename.cc # refer GetInfoLogPrefix() in rocksdb/util/filename.cc
# example db_path: /dev/shm/dbbench # example db_path: /dev/shm/dbbench
file_name = db_path[1:] # to ignore the leading '/' character file_name = db_path[1:] # to ignore the leading '/' character
to_be_replaced = re.compile('[^0-9a-zA-Z\-_\.]') to_be_replaced = re.compile("[^0-9a-zA-Z\-_\.]") # noqa
for character in to_be_replaced.findall(db_path): for character in to_be_replaced.findall(db_path):
file_name = file_name.replace(character, '_') file_name = file_name.replace(character, "_")
if not file_name.endswith('_'): if not file_name.endswith("_"):
file_name += '_' file_name += "_"
file_name += 'LOG' file_name += "LOG"
return file_name return file_name

@ -4,6 +4,7 @@
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import argparse import argparse
from advisor.db_config_optimizer import ConfigOptimizer from advisor.db_config_optimizer import ConfigOptimizer
from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions from advisor.db_options_parser import DatabaseOptions
@ -23,44 +24,35 @@ def main(args):
bench_runner_class = getattr(bench_runner_module, args.benchrunner_class) bench_runner_class = getattr(bench_runner_module, args.benchrunner_class)
ods_args = {} ods_args = {}
if args.ods_client and args.ods_entity: if args.ods_client and args.ods_entity:
ods_args['client_script'] = args.ods_client ods_args["client_script"] = args.ods_client
ods_args['entity'] = args.ods_entity ods_args["entity"] = args.ods_entity
if args.ods_key_prefix: if args.ods_key_prefix:
ods_args['key_prefix'] = args.ods_key_prefix ods_args["key_prefix"] = args.ods_key_prefix
db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args) db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args)
# initialise the database configuration # initialise the database configuration
db_options = DatabaseOptions(args.rocksdb_options, args.misc_options) db_options = DatabaseOptions(args.rocksdb_options, args.misc_options)
# set the frequency at which stats are dumped in the LOG file and the # set the frequency at which stats are dumped in the LOG file and the
# location of the LOG file. # location of the LOG file.
db_log_dump_settings = { db_log_dump_settings = {
"DBOptions.stats_dump_period_sec": { "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: args.stats_dump_period_sec}
NO_COL_FAMILY: args.stats_dump_period_sec
}
} }
db_options.update_options(db_log_dump_settings) db_options.update_options(db_log_dump_settings)
# initialise the configuration optimizer # initialise the configuration optimizer
config_optimizer = ConfigOptimizer( config_optimizer = ConfigOptimizer(
db_bench_runner, db_bench_runner, db_options, rule_spec_parser, args.base_db_path
db_options,
rule_spec_parser,
args.base_db_path
) )
# run the optimiser to improve the database configuration for given # run the optimiser to improve the database configuration for given
# benchmarks, with the help of expert-specified rules # benchmarks, with the help of expert-specified rules
final_db_options = config_optimizer.run() final_db_options = config_optimizer.run()
# generate the final rocksdb options file # generate the final rocksdb options file
print( print(
'Final configuration in: ' + "Final configuration in: " + final_db_options.generate_options_config("final")
final_db_options.generate_options_config('final')
)
print(
'Final miscellaneous options: ' +
repr(final_db_options.get_misc_options())
) )
print("Final miscellaneous options: " + repr(final_db_options.get_misc_options()))
if __name__ == '__main__': if __name__ == "__main__":
''' """
An example run of this tool from the command-line would look like: An example run of this tool from the command-line would look like:
python3 -m advisor.config_optimizer_example python3 -m advisor.config_optimizer_example
--base_db_path=/tmp/rocksdbtest-155919/dbbench --base_db_path=/tmp/rocksdbtest-155919/dbbench
@ -69,66 +61,80 @@ if __name__ == '__main__':
--benchrunner_module=advisor.db_bench_runner --benchrunner_module=advisor.db_bench_runner
--benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench --benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench
readwhilewriting use_existing_db=true duration=90 readwhilewriting use_existing_db=true duration=90
''' """
parser = argparse.ArgumentParser(description='This script is used for\ parser = argparse.ArgumentParser(
searching for a better database configuration') description="This script is used for\
searching for a better database configuration"
)
parser.add_argument( parser.add_argument(
'--rocksdb_options', required=True, type=str, "--rocksdb_options",
help='path of the starting Rocksdb OPTIONS file' required=True,
type=str,
help="path of the starting Rocksdb OPTIONS file",
) )
# these are options that are column-family agnostic and are not yet # these are options that are column-family agnostic and are not yet
# supported by the Rocksdb Options file: eg. bloom_bits=2 # supported by the Rocksdb Options file: eg. bloom_bits=2
parser.add_argument( parser.add_argument(
'--misc_options', nargs='*', "--misc_options",
help='whitespace-separated list of options that are not supported ' + nargs="*",
'by the Rocksdb OPTIONS file, given in the ' + help="whitespace-separated list of options that are not supported "
'<option_name>=<option_value> format eg. "bloom_bits=2 ' + + "by the Rocksdb OPTIONS file, given in the "
'rate_limiter_bytes_per_sec=128000000"') + '<option_name>=<option_value> format eg. "bloom_bits=2 '
parser.add_argument( + 'rate_limiter_bytes_per_sec=128000000"',
'--base_db_path', required=True, type=str,
help='path for the Rocksdb database'
) )
parser.add_argument( parser.add_argument(
'--rules_spec', required=True, type=str, "--base_db_path", required=True, type=str, help="path for the Rocksdb database"
help='path of the file containing the expert-specified Rules'
) )
parser.add_argument( parser.add_argument(
'--stats_dump_period_sec', required=True, type=int, "--rules_spec",
help='the frequency (in seconds) at which STATISTICS are printed to ' + required=True,
'the Rocksdb LOG file' type=str,
help="path of the file containing the expert-specified Rules",
) )
# ODS arguments
parser.add_argument( parser.add_argument(
'--ods_client', type=str, help='the ODS client binary' "--stats_dump_period_sec",
required=True,
type=int,
help="the frequency (in seconds) at which STATISTICS are printed to "
+ "the Rocksdb LOG file",
) )
# ODS arguments
parser.add_argument("--ods_client", type=str, help="the ODS client binary")
parser.add_argument( parser.add_argument(
'--ods_entity', type=str, "--ods_entity",
help='the servers for which the ODS stats need to be fetched' type=str,
help="the servers for which the ODS stats need to be fetched",
) )
parser.add_argument( parser.add_argument(
'--ods_key_prefix', type=str, "--ods_key_prefix",
help='the prefix that needs to be attached to the keys of time ' + type=str,
'series to be fetched from ODS' help="the prefix that needs to be attached to the keys of time "
+ "series to be fetched from ODS",
) )
# benchrunner_module example: advisor.db_benchmark_client # benchrunner_module example: advisor.db_benchmark_client
parser.add_argument( parser.add_argument(
'--benchrunner_module', required=True, type=str, "--benchrunner_module",
help='the module containing the BenchmarkRunner class to be used by ' + required=True,
'the Optimizer, example: advisor.db_bench_runner' type=str,
help="the module containing the BenchmarkRunner class to be used by "
+ "the Optimizer, example: advisor.db_bench_runner",
) )
# benchrunner_class example: DBBenchRunner # benchrunner_class example: DBBenchRunner
parser.add_argument( parser.add_argument(
'--benchrunner_class', required=True, type=str, "--benchrunner_class",
help='the name of the BenchmarkRunner class to be used by the ' + required=True,
'Optimizer, should be present in the module provided in the ' + type=str,
'benchrunner_module argument, example: DBBenchRunner' help="the name of the BenchmarkRunner class to be used by the "
+ "Optimizer, should be present in the module provided in the "
+ "benchrunner_module argument, example: DBBenchRunner",
) )
parser.add_argument( parser.add_argument(
'--benchrunner_pos_args', nargs='*', "--benchrunner_pos_args",
help='whitespace-separated positional arguments that are passed on ' + nargs="*",
'to the constructor of the BenchmarkRunner class provided in the ' + help="whitespace-separated positional arguments that are passed on "
'benchrunner_class argument, example: "use_existing_db=true ' + + "to the constructor of the BenchmarkRunner class provided in the "
'duration=900"' + 'benchrunner_class argument, example: "use_existing_db=true '
+ 'duration=900"',
) )
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)

@ -3,19 +3,22 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.bench_runner import BenchmarkRunner
from advisor.db_log_parser import DataSource, DatabaseLogs, NO_COL_FAMILY
from advisor.db_stats_fetcher import (
LogStatsParser, OdsStatsFetcher, DatabasePerfContext
)
import shutil import shutil
import subprocess import subprocess
import time import time
from advisor.bench_runner import BenchmarkRunner
from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
from advisor.db_stats_fetcher import (
DatabasePerfContext,
LogStatsParser,
OdsStatsFetcher,
)
''' """
NOTE: This is not thread-safe, because the output file is simply overwritten. NOTE: This is not thread-safe, because the output file is simply overwritten.
''' """
class DBBenchRunner(BenchmarkRunner): class DBBenchRunner(BenchmarkRunner):
@ -37,9 +40,7 @@ class DBBenchRunner(BenchmarkRunner):
optional_args_str = "" optional_args_str = ""
for option_name, option_value in misc_options_dict.items(): for option_name, option_value in misc_options_dict.items():
if option_value: if option_value:
optional_args_str += ( optional_args_str += " --" + option_name + "=" + str(option_value)
" --" + option_name + "=" + str(option_value)
)
return optional_args_str return optional_args_str
def __init__(self, positional_args, ods_args=None): def __init__(self, positional_args, ods_args=None):
@ -54,19 +55,17 @@ class DBBenchRunner(BenchmarkRunner):
self.ods_args = ods_args self.ods_args = ods_args
def _parse_output(self, get_perf_context=False): def _parse_output(self, get_perf_context=False):
''' """
Sample db_bench output after running 'readwhilewriting' benchmark: Sample db_bench output after running 'readwhilewriting' benchmark:
DB path: [/tmp/rocksdbtest-155919/dbbench]\n DB path: [/tmp/rocksdbtest-155919/dbbench]\n
readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\ readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
of 5427999 found)\n of 5427999 found)\n
PERF_CONTEXT:\n PERF_CONTEXT:\n
user_key_comparison_count = 500466712, block_cache_hit_count = ...\n user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
''' """
output = { output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None
}
perf_context_begins = False perf_context_begins = False
with open(self.OUTPUT_FILE, 'r') as fp: with open(self.OUTPUT_FILE, "r") as fp:
for line in fp: for line in fp:
if line.startswith(self.benchmark): if line.startswith(self.benchmark):
# line from sample output: # line from sample output:
@ -77,9 +76,7 @@ class DBBenchRunner(BenchmarkRunner):
for ix, token in enumerate(token_list): for ix, token in enumerate(token_list):
if token.startswith(self.THROUGHPUT): if token.startswith(self.THROUGHPUT):
# in above example, throughput = 60305 ops/sec # in above example, throughput = 60305 ops/sec
output[self.THROUGHPUT] = ( output[self.THROUGHPUT] = float(token_list[ix - 1])
float(token_list[ix - 1])
)
break break
elif get_perf_context and line.startswith(self.PERF_CON): elif get_perf_context and line.startswith(self.PERF_CON):
# the following lines in the output contain perf context # the following lines in the output contain perf context
@ -89,11 +86,11 @@ class DBBenchRunner(BenchmarkRunner):
# Sample perf_context output: # Sample perf_context output:
# user_key_comparison_count = 500, block_cache_hit_count =\ # user_key_comparison_count = 500, block_cache_hit_count =\
# 468, block_read_count = 580, block_read_byte = 445, ... # 468, block_read_count = 580, block_read_byte = 445, ...
token_list = line.strip().split(',') token_list = line.strip().split(",")
# token_list = ['user_key_comparison_count = 500', # token_list = ['user_key_comparison_count = 500',
# 'block_cache_hit_count = 468','block_read_count = 580'... # 'block_cache_hit_count = 468','block_read_count = 580'...
perf_context = { perf_context = {
tk.split('=')[0].strip(): tk.split('=')[1].strip() tk.split("=")[0].strip(): tk.split("=")[1].strip()
for tk in token_list for tk in token_list
if tk if tk
} }
@ -103,17 +100,13 @@ class DBBenchRunner(BenchmarkRunner):
timestamp = int(time.time()) timestamp = int(time.time())
perf_context_ts = {} perf_context_ts = {}
for stat in perf_context.keys(): for stat in perf_context.keys():
perf_context_ts[stat] = { perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
timestamp: int(perf_context[stat])
}
output[self.PERF_CON] = perf_context_ts output[self.PERF_CON] = perf_context_ts
perf_context_begins = False perf_context_begins = False
elif line.startswith(self.DB_PATH): elif line.startswith(self.DB_PATH):
# line from sample output: # line from sample output:
# DB path: [/tmp/rocksdbtest-155919/dbbench]\n # DB path: [/tmp/rocksdbtest-155919/dbbench]\n
output[self.DB_PATH] = ( output[self.DB_PATH] = line.split("[")[1].split("]")[0]
line.split('[')[1].split(']')[0]
)
return output return output
def get_log_options(self, db_options, db_path): def get_log_options(self, db_options, db_path):
@ -124,40 +117,38 @@ class DBBenchRunner(BenchmarkRunner):
logs_file_prefix = None logs_file_prefix = None
# fetch frequency at which the stats are dumped in the Rocksdb logs # fetch frequency at which the stats are dumped in the Rocksdb logs
dump_period = 'DBOptions.stats_dump_period_sec' dump_period = "DBOptions.stats_dump_period_sec"
# fetch the directory, if specified, in which the Rocksdb logs are # fetch the directory, if specified, in which the Rocksdb logs are
# dumped, by default logs are dumped in same location as database # dumped, by default logs are dumped in same location as database
log_dir = 'DBOptions.db_log_dir' log_dir = "DBOptions.db_log_dir"
log_options = db_options.get_options([dump_period, log_dir]) log_options = db_options.get_options([dump_period, log_dir])
if dump_period in log_options: if dump_period in log_options:
stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY]) stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
if log_dir in log_options: if log_dir in log_options:
log_dir_path = log_options[log_dir][NO_COL_FAMILY] log_dir_path = log_options[log_dir][NO_COL_FAMILY]
log_file_name = DBBenchRunner.get_info_log_file_name( log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)
log_dir_path, db_path
)
if not log_dir_path: if not log_dir_path:
log_dir_path = db_path log_dir_path = db_path
if not log_dir_path.endswith('/'): if not log_dir_path.endswith("/"):
log_dir_path += '/' log_dir_path += "/"
logs_file_prefix = log_dir_path + log_file_name logs_file_prefix = log_dir_path + log_file_name
return (logs_file_prefix, stats_freq_sec) return (logs_file_prefix, stats_freq_sec)
def _get_options_command_line_args_str(self, curr_options): def _get_options_command_line_args_str(self, curr_options):
''' """
This method uses the provided Rocksdb OPTIONS to create a string of This method uses the provided Rocksdb OPTIONS to create a string of
command-line arguments for db_bench. command-line arguments for db_bench.
The --options_file argument is always given and the options that are The --options_file argument is always given and the options that are
not supported by the OPTIONS file are given as separate arguments. not supported by the OPTIONS file are given as separate arguments.
''' """
optional_args_str = DBBenchRunner.get_opt_args_str( optional_args_str = DBBenchRunner.get_opt_args_str(
curr_options.get_misc_options() curr_options.get_misc_options()
) )
# generate an options configuration file # generate an options configuration file
options_file = curr_options.generate_options_config(nonce='12345') options_file = curr_options.generate_options_config(nonce="12345")
optional_args_str += " --options_file=" + options_file optional_args_str += " --options_file=" + options_file
return optional_args_str return optional_args_str
@ -166,10 +157,11 @@ class DBBenchRunner(BenchmarkRunner):
try: try:
shutil.rmtree(db_path, ignore_errors=True) shutil.rmtree(db_path, ignore_errors=True)
except OSError as e: except OSError as e:
print('Error: rmdir ' + e.filename + ' ' + e.strerror) print("Error: rmdir " + e.filename + " " + e.strerror)
# setup database with a million keys using the fillrandom benchmark # setup database with a million keys using the fillrandom benchmark
command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % ( command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
self.db_bench_binary, db_path self.db_bench_binary,
db_path,
) )
args_str = self._get_options_command_line_args_str(curr_options) args_str = self._get_options_command_line_args_str(curr_options)
command += args_str command += args_str
@ -177,21 +169,23 @@ class DBBenchRunner(BenchmarkRunner):
def _build_experiment_command(self, curr_options, db_path): def _build_experiment_command(self, curr_options, db_path):
command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % ( command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
self.db_bench_binary, self.benchmark, db_path self.db_bench_binary,
self.benchmark,
db_path,
) )
# fetch the command-line arguments string for providing Rocksdb options # fetch the command-line arguments string for providing Rocksdb options
args_str = self._get_options_command_line_args_str(curr_options) args_str = self._get_options_command_line_args_str(curr_options)
# handle the command-line args passed in the constructor, these # handle the command-line args passed in the constructor, these
# arguments are specific to db_bench # arguments are specific to db_bench
for cmd_line_arg in self.db_bench_args: for cmd_line_arg in self.db_bench_args:
args_str += (" --" + cmd_line_arg) args_str += " --" + cmd_line_arg
command += args_str command += args_str
return command return command
def _run_command(self, command): def _run_command(self, command):
out_file = open(self.OUTPUT_FILE, "w+") out_file = open(self.OUTPUT_FILE, "w+")
err_file = open(self.ERROR_FILE, "w+") err_file = open(self.ERROR_FILE, "w+")
print('executing... - ' + command) print("executing... - " + command)
subprocess.call(command, shell=True, stdout=out_file, stderr=err_file) subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
out_file.close() out_file.close()
err_file.close() err_file.close()
@ -214,32 +208,30 @@ class DBBenchRunner(BenchmarkRunner):
db_options, parsed_output[self.DB_PATH] db_options, parsed_output[self.DB_PATH]
) )
# create the Rocksbd LOGS object # create the Rocksbd LOGS object
db_logs = DatabaseLogs( db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
logs_file_prefix, db_options.get_column_families()
)
# Create the Log STATS object # Create the Log STATS object
db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec) db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
# Create the PerfContext STATS object # Create the PerfContext STATS object
db_perf_context = DatabasePerfContext( db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
parsed_output[self.PERF_CON], 0, False
)
# create the data-sources dictionary # create the data-sources dictionary
data_sources = { data_sources = {
DataSource.Type.DB_OPTIONS: [db_options], DataSource.Type.DB_OPTIONS: [db_options],
DataSource.Type.LOG: [db_logs], DataSource.Type.LOG: [db_logs],
DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context] DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
} }
# Create the ODS STATS object # Create the ODS STATS object
if self.ods_args: if self.ods_args:
key_prefix = '' key_prefix = ""
if 'key_prefix' in self.ods_args: if "key_prefix" in self.ods_args:
key_prefix = self.ods_args['key_prefix'] key_prefix = self.ods_args["key_prefix"]
data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher( data_sources[DataSource.Type.TIME_SERIES].append(
self.ods_args['client_script'], OdsStatsFetcher(
self.ods_args['entity'], self.ods_args["client_script"],
experiment_start_time, self.ods_args["entity"],
experiment_end_time, experiment_start_time,
key_prefix experiment_end_time,
)) key_prefix,
)
)
# return the experiment's data-sources and throughput # return the experiment's data-sources and throughput
return data_sources, parsed_output[self.THROUGHPUT] return data_sources, parsed_output[self.THROUGHPUT]

@ -3,16 +3,17 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import copy
import random
from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import Suggestion from advisor.rule_parser import Suggestion
import copy
import random
class ConfigOptimizer: class ConfigOptimizer:
SCOPE = 'scope' SCOPE = "scope"
SUGG_VAL = 'suggested values' SUGG_VAL = "suggested values"
@staticmethod @staticmethod
def apply_action_on_value(old_value, action, suggested_values): def apply_action_on_value(old_value, action, suggested_values):
@ -21,7 +22,7 @@ class ConfigOptimizer:
chosen_sugg_val = random.choice(list(suggested_values)) chosen_sugg_val = random.choice(list(suggested_values))
new_value = None new_value = None
if action is Suggestion.Action.set or not old_value: if action is Suggestion.Action.set or not old_value:
assert(chosen_sugg_val) assert chosen_sugg_val
new_value = chosen_sugg_val new_value = chosen_sugg_val
else: else:
# For increase/decrease actions, currently the code tries to make # For increase/decrease actions, currently the code tries to make
@ -61,8 +62,8 @@ class ConfigOptimizer:
# A Suggestion in the rules spec must have the 'option' and # A Suggestion in the rules spec must have the 'option' and
# 'action' fields defined, always call perform_checks() method # 'action' fields defined, always call perform_checks() method
# after parsing the rules file using RulesSpec # after parsing the rules file using RulesSpec
assert(option) assert option
assert(action) assert action
required_options.append(option) required_options.append(option)
rule_suggestions.append(suggestions_dict[sugg_name]) rule_suggestions.append(suggestions_dict[sugg_name])
current_config = options.get_options(required_options) current_config = options.get_options(required_options)
@ -87,8 +88,9 @@ class ConfigOptimizer:
updated_config[sugg.option][col_fam] = new_value updated_config[sugg.option][col_fam] = new_value
except AssertionError: except AssertionError:
print( print(
'WARNING(ConfigOptimizer): provide suggested_values ' + "WARNING(ConfigOptimizer): provide suggested_values "
'for ' + sugg.option + "for "
+ sugg.option
) )
continue continue
# case: when the option is present in the current configuration # case: when the option is present in the current configuration
@ -103,8 +105,9 @@ class ConfigOptimizer:
updated_config[sugg.option][NO_COL_FAMILY] = new_value updated_config[sugg.option][NO_COL_FAMILY] = new_value
except AssertionError: except AssertionError:
print( print(
'WARNING(ConfigOptimizer): provide suggested_values ' + "WARNING(ConfigOptimizer): provide suggested_values "
'for ' + sugg.option + "for "
+ sugg.option
) )
else: else:
for col_fam in rule.get_trigger_column_families(): for col_fam in rule.get_trigger_column_families():
@ -120,15 +123,16 @@ class ConfigOptimizer:
updated_config[sugg.option][col_fam] = new_value updated_config[sugg.option][col_fam] = new_value
except AssertionError: except AssertionError:
print( print(
'WARNING(ConfigOptimizer): provide ' + "WARNING(ConfigOptimizer): provide "
'suggested_values for ' + sugg.option + "suggested_values for "
+ sugg.option
) )
return current_config, updated_config return current_config, updated_config
@staticmethod @staticmethod
def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack): def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack):
if not rules: if not rules:
print('\nNo more rules triggered!') print("\nNo more rules triggered!")
return None return None
# if the last rule provided an improvement in the database performance, # if the last rule provided an improvement in the database performance,
# and it was triggered again (i.e. it is present in 'rules'), then pick # and it was triggered again (i.e. it is present in 'rules'), then pick
@ -143,7 +147,7 @@ class ConfigOptimizer:
for rule in rules: for rule in rules:
if rule.name not in rules_tried: if rule.name not in rules_tried:
return rule return rule
print('\nAll rules have been exhausted') print("\nAll rules have been exhausted")
return None return None
@staticmethod @staticmethod
@ -153,13 +157,13 @@ class ConfigOptimizer:
rules_tried, rules_tried,
backtrack, backtrack,
curr_options, curr_options,
suggestions_dict suggestions_dict,
): ):
curr_rule = ConfigOptimizer.pick_rule_to_apply( curr_rule = ConfigOptimizer.pick_rule_to_apply(
triggered_rules, current_rule_name, rules_tried, backtrack triggered_rules, current_rule_name, rules_tried, backtrack
) )
if not curr_rule: if not curr_rule:
return tuple([None]*4) return tuple([None] * 4)
# if a rule has been picked for improving db_config, update rules_tried # if a rule has been picked for improving db_config, update rules_tried
rules_tried.add(curr_rule.name) rules_tried.add(curr_rule.name)
# get updated config based on the picked rule # get updated config based on the picked rule
@ -168,17 +172,20 @@ class ConfigOptimizer:
) )
conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf) conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf)
if not conf_diff: # the current and updated configs are the same if not conf_diff: # the current and updated configs are the same
curr_rule, rules_tried, curr_conf, updated_conf = ( (
ConfigOptimizer.apply_suggestions( curr_rule,
triggered_rules, rules_tried,
None, curr_conf,
rules_tried, updated_conf,
backtrack, ) = ConfigOptimizer.apply_suggestions(
curr_options, triggered_rules,
suggestions_dict None,
) rules_tried,
backtrack,
curr_options,
suggestions_dict,
) )
print('returning from apply_suggestions') print("returning from apply_suggestions")
return (curr_rule, rules_tried, curr_conf, updated_conf) return (curr_rule, rules_tried, curr_conf, updated_conf)
# TODO(poojam23): check if this method is required or can we directly set # TODO(poojam23): check if this method is required or can we directly set
@ -205,52 +212,53 @@ class ConfigOptimizer:
# RULE from all the triggered rules and apply all its suggestions to # RULE from all the triggered rules and apply all its suggestions to
# the appropriate options. # the appropriate options.
# bootstrapping the optimizer # bootstrapping the optimizer
print('Bootstrapping optimizer:') print("Bootstrapping optimizer:")
options = copy.deepcopy(self.db_options) options = copy.deepcopy(self.db_options)
old_data_sources, old_metric = ( old_data_sources, old_metric = self.bench_runner.run_experiment(
self.bench_runner.run_experiment(options, self.base_db_path) options, self.base_db_path
) )
print('Initial metric: ' + str(old_metric)) print("Initial metric: " + str(old_metric))
self.rule_parser.load_rules_from_spec() self.rule_parser.load_rules_from_spec()
self.rule_parser.perform_section_checks() self.rule_parser.perform_section_checks()
triggered_rules = self.rule_parser.get_triggered_rules( triggered_rules = self.rule_parser.get_triggered_rules(
old_data_sources, options.get_column_families() old_data_sources, options.get_column_families()
) )
print('\nTriggered:') print("\nTriggered:")
self.rule_parser.print_rules(triggered_rules) self.rule_parser.print_rules(triggered_rules)
backtrack = False backtrack = False
rules_tried = set() rules_tried = set()
curr_rule, rules_tried, curr_conf, updated_conf = ( (
ConfigOptimizer.apply_suggestions( curr_rule,
triggered_rules, rules_tried,
None, curr_conf,
rules_tried, updated_conf,
backtrack, ) = ConfigOptimizer.apply_suggestions(
options, triggered_rules,
self.rule_parser.get_suggestions_dict() None,
) rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict(),
) )
# the optimizer loop # the optimizer loop
while curr_rule: while curr_rule:
print('\nRule picked for next iteration:') print("\nRule picked for next iteration:")
print(curr_rule.name) print(curr_rule.name)
print('\ncurrent config:') print("\ncurrent config:")
print(curr_conf) print(curr_conf)
print('updated config:') print("updated config:")
print(updated_conf) print(updated_conf)
options.update_options(updated_conf) options.update_options(updated_conf)
# run bench_runner with updated config # run bench_runner with updated config
new_data_sources, new_metric = ( new_data_sources, new_metric = self.bench_runner.run_experiment(
self.bench_runner.run_experiment(options, self.base_db_path) options, self.base_db_path
)
print('\nnew metric: ' + str(new_metric))
backtrack = not self.bench_runner.is_metric_better(
new_metric, old_metric
) )
print("\nnew metric: " + str(new_metric))
backtrack = not self.bench_runner.is_metric_better(new_metric, old_metric)
# update triggered_rules, metric, data_sources, if required # update triggered_rules, metric, data_sources, if required
if backtrack: if backtrack:
# revert changes to options config # revert changes to options config
print('\nBacktracking to previous configuration') print("\nBacktracking to previous configuration")
backtrack_conf = ConfigOptimizer.get_backtrack_config( backtrack_conf = ConfigOptimizer.get_backtrack_config(
curr_conf, updated_conf curr_conf, updated_conf
) )
@ -262,21 +270,24 @@ class ConfigOptimizer:
triggered_rules = self.rule_parser.get_triggered_rules( triggered_rules = self.rule_parser.get_triggered_rules(
new_data_sources, options.get_column_families() new_data_sources, options.get_column_families()
) )
print('\nTriggered:') print("\nTriggered:")
self.rule_parser.print_rules(triggered_rules) self.rule_parser.print_rules(triggered_rules)
old_metric = new_metric old_metric = new_metric
old_data_sources = new_data_sources old_data_sources = new_data_sources
rules_tried = set() rules_tried = set()
# pick rule to work on and set curr_rule to that # pick rule to work on and set curr_rule to that
curr_rule, rules_tried, curr_conf, updated_conf = ( (
ConfigOptimizer.apply_suggestions( curr_rule,
triggered_rules, rules_tried,
curr_rule.name, curr_conf,
rules_tried, updated_conf,
backtrack, ) = ConfigOptimizer.apply_suggestions(
options, triggered_rules,
self.rule_parser.get_suggestions_dict() curr_rule.name,
) rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict(),
) )
# return the final database options configuration # return the final database options configuration
return options return options

@ -3,15 +3,15 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from abc import ABC, abstractmethod
from calendar import timegm
from enum import Enum
import glob import glob
import re import re
import time import time
from abc import ABC, abstractmethod
from calendar import timegm
from enum import Enum
NO_COL_FAMILY = 'DB_WIDE' NO_COL_FAMILY = "DB_WIDE"
class DataSource(ABC): class DataSource(ABC):
@ -33,7 +33,7 @@ class Log:
def is_new_log(log_line): def is_new_log(log_line):
# The assumption is that a new log will start with a date printed in # The assumption is that a new log will start with a date printed in
# the below regex format. # the below regex format.
date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}' date_regex = "\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa
return re.match(date_regex, log_line) return re.match(date_regex, log_line)
def __init__(self, log_line, column_families): def __init__(self, log_line, column_families):
@ -46,7 +46,7 @@ class Log:
# "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634] # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634]
# [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n" # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n"
for col_fam in column_families: for col_fam in column_families:
search_for_str = '\[' + col_fam + '\]' search_for_str = "\[" + col_fam + "\]" # noqa
if re.search(search_for_str, self.message): if re.search(search_for_str, self.message):
self.column_family = col_fam self.column_family = col_fam
break break
@ -67,21 +67,26 @@ class Log:
return self.message return self.message
def append_message(self, remaining_log): def append_message(self, remaining_log):
self.message = self.message + '\n' + remaining_log.strip() self.message = self.message + "\n" + remaining_log.strip()
def get_timestamp(self): def get_timestamp(self):
# example: '2018/07/25-11:25:45.782710' will be converted to the GMT # example: '2018/07/25-11:25:45.782710' will be converted to the GMT
# Unix timestamp 1532517945 (note: this method assumes that self.time # Unix timestamp 1532517945 (note: this method assumes that self.time
# is in GMT) # is in GMT)
hr_time = self.time + 'GMT' hr_time = self.time + "GMT"
timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z")) timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z"))
return timestamp return timestamp
def __repr__(self): def __repr__(self):
return ( return (
'time: ' + self.time + '; context: ' + self.context + "time: "
'; col_fam: ' + self.column_family + + self.time
'; message: ' + self.message + "; context: "
+ self.context
+ "; col_fam: "
+ self.column_family
+ "; message: "
+ self.message
) )
@ -106,22 +111,20 @@ class DatabaseLogs(DataSource):
cond.set_trigger(trigger) cond.set_trigger(trigger)
def check_and_trigger_conditions(self, conditions): def check_and_trigger_conditions(self, conditions):
for file_name in glob.glob(self.logs_path_prefix + '*'): for file_name in glob.glob(self.logs_path_prefix + "*"):
# TODO(poojam23): find a way to distinguish between log files # TODO(poojam23): find a way to distinguish between log files
# - generated in the current experiment but are labeled 'old' # - generated in the current experiment but are labeled 'old'
# because they LOGs exceeded the file size limit AND # because they LOGs exceeded the file size limit AND
# - generated in some previous experiment that are also labeled # - generated in some previous experiment that are also labeled
# 'old' and were not deleted for some reason # 'old' and were not deleted for some reason
if re.search('old', file_name, re.IGNORECASE): if re.search("old", file_name, re.IGNORECASE):
continue continue
with open(file_name, 'r') as db_logs: with open(file_name, "r") as db_logs:
new_log = None new_log = None
for line in db_logs: for line in db_logs:
if Log.is_new_log(line): if Log.is_new_log(line):
if new_log: if new_log:
self.trigger_conditions_for_log( self.trigger_conditions_for_log(conditions, new_log)
conditions, new_log
)
new_log = Log(line, self.column_families) new_log = Log(line, self.column_families)
else: else:
# To account for logs split into multiple lines # To account for logs split into multiple lines

@ -4,25 +4,26 @@
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import copy import copy
import os
from advisor.db_log_parser import DataSource, NO_COL_FAMILY from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.ini_parser import IniParser from advisor.ini_parser import IniParser
import os
class OptionsSpecParser(IniParser): class OptionsSpecParser(IniParser):
@staticmethod @staticmethod
def is_new_option(line): def is_new_option(line):
return '=' in line return "=" in line
@staticmethod @staticmethod
def get_section_type(line): def get_section_type(line):
''' """
Example section header: [TableOptions/BlockBasedTable "default"] Example section header: [TableOptions/BlockBasedTable "default"]
Here ConfigurationOptimizer returned would be Here ConfigurationOptimizer returned would be
'TableOptions.BlockBasedTable' 'TableOptions.BlockBasedTable'
''' """
section_path = line.strip()[1:-1].split()[0] section_path = line.strip()[1:-1].split()[0]
section_type = '.'.join(section_path.split('/')) section_type = ".".join(section_path.split("/"))
return section_type return section_type
@staticmethod @staticmethod
@ -39,20 +40,20 @@ class OptionsSpecParser(IniParser):
# Example: # Example:
# Case 1: get_section_str('DBOptions', NO_COL_FAMILY) # Case 1: get_section_str('DBOptions', NO_COL_FAMILY)
# Case 2: get_section_str('TableOptions.BlockBasedTable', 'default') # Case 2: get_section_str('TableOptions.BlockBasedTable', 'default')
section_type = '/'.join(section_type.strip().split('.')) section_type = "/".join(section_type.strip().split("."))
# Case 1: section_type = 'DBOptions' # Case 1: section_type = 'DBOptions'
# Case 2: section_type = 'TableOptions/BlockBasedTable' # Case 2: section_type = 'TableOptions/BlockBasedTable'
section_str = '[' + section_type section_str = "[" + section_type
if section_name == NO_COL_FAMILY: if section_name == NO_COL_FAMILY:
# Case 1: '[DBOptions]' # Case 1: '[DBOptions]'
return (section_str + ']') return section_str + "]"
else: else:
# Case 2: '[TableOptions/BlockBasedTable "default"]' # Case 2: '[TableOptions/BlockBasedTable "default"]'
return section_str + ' "' + section_name + '"]' return section_str + ' "' + section_name + '"]'
@staticmethod @staticmethod
def get_option_str(key, values): def get_option_str(key, values):
option_str = key + '=' option_str = key + "="
# get_option_str('db_log_dir', None), returns 'db_log_dir=' # get_option_str('db_log_dir', None), returns 'db_log_dir='
if values: if values:
# example: # example:
@ -61,7 +62,7 @@ class OptionsSpecParser(IniParser):
# 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1' # 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1'
if isinstance(values, list): if isinstance(values, list):
for value in values: for value in values:
option_str += (str(value) + ':') option_str += str(value) + ":"
option_str = option_str[:-1] option_str = option_str[:-1]
else: else:
# example: get_option_str('write_buffer_size', 1048576) # example: get_option_str('write_buffer_size', 1048576)
@ -71,13 +72,12 @@ class OptionsSpecParser(IniParser):
class DatabaseOptions(DataSource): class DatabaseOptions(DataSource):
@staticmethod @staticmethod
def is_misc_option(option_name): def is_misc_option(option_name):
# these are miscellaneous options that are not yet supported by the # these are miscellaneous options that are not yet supported by the
# Rocksdb options file, hence they are not prefixed with any section # Rocksdb options file, hence they are not prefixed with any section
# name # name
return '.' not in option_name return "." not in option_name
@staticmethod @staticmethod
def get_options_diff(opt_old, opt_new): def get_options_diff(opt_old, opt_new):
@ -102,7 +102,7 @@ class DatabaseOptions(DataSource):
if opt_old[opt][col_fam] != opt_new[opt][col_fam]: if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
diff[opt][col_fam] = ( diff[opt][col_fam] = (
opt_old[opt][col_fam], opt_old[opt][col_fam],
opt_new[opt][col_fam] opt_new[opt][col_fam],
) )
else: else:
diff[opt][col_fam] = (opt_old[opt][col_fam], None) diff[opt][col_fam] = (opt_old[opt][col_fam], None)
@ -111,7 +111,7 @@ class DatabaseOptions(DataSource):
if opt_old[opt][col_fam] != opt_new[opt][col_fam]: if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
diff[opt][col_fam] = ( diff[opt][col_fam] = (
opt_old[opt][col_fam], opt_old[opt][col_fam],
opt_new[opt][col_fam] opt_new[opt][col_fam],
) )
else: else:
diff[opt][col_fam] = (None, opt_new[opt][col_fam]) diff[opt][col_fam] = (None, opt_new[opt][col_fam])
@ -137,21 +137,19 @@ class DatabaseOptions(DataSource):
self.misc_options = {} self.misc_options = {}
if misc_options: if misc_options:
for option_pair_str in misc_options: for option_pair_str in misc_options:
option_name = option_pair_str.split('=')[0].strip() option_name = option_pair_str.split("=")[0].strip()
option_value = option_pair_str.split('=')[1].strip() option_value = option_pair_str.split("=")[1].strip()
self.misc_options[option_name] = option_value self.misc_options[option_name] = option_value
def load_from_source(self, options_path): def load_from_source(self, options_path):
self.options_dict = {} self.options_dict = {}
with open(options_path, 'r') as db_options: with open(options_path, "r") as db_options:
for line in db_options: for line in db_options:
line = OptionsSpecParser.remove_trailing_comment(line) line = OptionsSpecParser.remove_trailing_comment(line)
if not line: if not line:
continue continue
if OptionsSpecParser.is_section_header(line): if OptionsSpecParser.is_section_header(line):
curr_sec_type = ( curr_sec_type = OptionsSpecParser.get_section_type(line)
OptionsSpecParser.get_section_type(line)
)
curr_sec_name = OptionsSpecParser.get_section_name(line) curr_sec_name = OptionsSpecParser.get_section_name(line)
if curr_sec_type not in self.options_dict: if curr_sec_type not in self.options_dict:
self.options_dict[curr_sec_type] = {} self.options_dict[curr_sec_type] = {}
@ -163,17 +161,15 @@ class DatabaseOptions(DataSource):
# CFOptions and 'default' is the name of a column family # CFOptions and 'default' is the name of a column family
# that for this database, so it's added to the list of # that for this database, so it's added to the list of
# column families stored in this object # column families stored in this object
if curr_sec_type == 'CFOptions': if curr_sec_type == "CFOptions":
if not self.column_families: if not self.column_families:
self.column_families = [] self.column_families = []
self.column_families.append(curr_sec_name) self.column_families.append(curr_sec_name)
elif OptionsSpecParser.is_new_option(line): elif OptionsSpecParser.is_new_option(line):
key, value = OptionsSpecParser.get_key_value_pair(line) key, value = OptionsSpecParser.get_key_value_pair(line)
self.options_dict[curr_sec_type][curr_sec_name][key] = ( self.options_dict[curr_sec_type][curr_sec_name][key] = value
value
)
else: else:
error = 'Not able to parse line in Options file.' error = "Not able to parse line in Options file."
OptionsSpecParser.exit_with_parse_error(line, error) OptionsSpecParser.exit_with_parse_error(line, error)
def get_misc_options(self): def get_misc_options(self):
@ -193,7 +189,7 @@ class DatabaseOptions(DataSource):
for sec_type in self.options_dict: for sec_type in self.options_dict:
for col_fam in self.options_dict[sec_type]: for col_fam in self.options_dict[sec_type]:
for opt_name in self.options_dict[sec_type][col_fam]: for opt_name in self.options_dict[sec_type][col_fam]:
option = sec_type + '.' + opt_name option = sec_type + "." + opt_name
all_options.append(option) all_options.append(option)
all_options.extend(list(self.misc_options.keys())) all_options.extend(list(self.misc_options.keys()))
return self.get_options(all_options) return self.get_options(all_options)
@ -211,24 +207,22 @@ class DatabaseOptions(DataSource):
continue continue
if option not in reqd_options_dict: if option not in reqd_options_dict:
reqd_options_dict[option] = {} reqd_options_dict[option] = {}
reqd_options_dict[option][NO_COL_FAMILY] = ( reqd_options_dict[option][NO_COL_FAMILY] = self.misc_options[option]
self.misc_options[option]
)
else: else:
# Example: option = 'TableOptions.BlockBasedTable.block_align' # Example: option = 'TableOptions.BlockBasedTable.block_align'
# then, sec_type = 'TableOptions.BlockBasedTable' # then, sec_type = 'TableOptions.BlockBasedTable'
sec_type = '.'.join(option.split('.')[:-1]) sec_type = ".".join(option.split(".")[:-1])
# opt_name = 'block_align' # opt_name = 'block_align'
opt_name = option.split('.')[-1] opt_name = option.split(".")[-1]
if sec_type not in self.options_dict: if sec_type not in self.options_dict:
continue continue
for col_fam in self.options_dict[sec_type]: for col_fam in self.options_dict[sec_type]:
if opt_name in self.options_dict[sec_type][col_fam]: if opt_name in self.options_dict[sec_type][col_fam]:
if option not in reqd_options_dict: if option not in reqd_options_dict:
reqd_options_dict[option] = {} reqd_options_dict[option] = {}
reqd_options_dict[option][col_fam] = ( reqd_options_dict[option][col_fam] = self.options_dict[
self.options_dict[sec_type][col_fam][opt_name] sec_type
) ][col_fam][opt_name]
return reqd_options_dict return reqd_options_dict
def update_options(self, options): def update_options(self, options):
@ -244,16 +238,19 @@ class DatabaseOptions(DataSource):
# misc_options dictionary # misc_options dictionary
if NO_COL_FAMILY not in options[option]: if NO_COL_FAMILY not in options[option]:
print( print(
'WARNING(DatabaseOptions.update_options): not ' + "WARNING(DatabaseOptions.update_options): not "
'updating option ' + option + ' because it is in ' + + "updating option "
'misc_option format but its scope is not ' + + option
NO_COL_FAMILY + '. Check format of option.' + " because it is in "
+ "misc_option format but its scope is not "
+ NO_COL_FAMILY
+ ". Check format of option."
) )
continue continue
self.misc_options[option] = options[option][NO_COL_FAMILY] self.misc_options[option] = options[option][NO_COL_FAMILY]
else: else:
sec_name = '.'.join(option.split('.')[:-1]) sec_name = ".".join(option.split(".")[:-1])
opt_name = option.split('.')[-1] opt_name = option.split(".")[-1]
if sec_name not in self.options_dict: if sec_name not in self.options_dict:
self.options_dict[sec_name] = {} self.options_dict[sec_name] = {}
for col_fam in options[option]: for col_fam in options[option]:
@ -262,30 +259,26 @@ class DatabaseOptions(DataSource):
# value # value
if col_fam not in self.options_dict[sec_name]: if col_fam not in self.options_dict[sec_name]:
self.options_dict[sec_name][col_fam] = {} self.options_dict[sec_name][col_fam] = {}
self.options_dict[sec_name][col_fam][opt_name] = ( self.options_dict[sec_name][col_fam][opt_name] = copy.deepcopy(
copy.deepcopy(options[option][col_fam]) options[option][col_fam]
) )
def generate_options_config(self, nonce): def generate_options_config(self, nonce):
# this method generates a Rocksdb OPTIONS file in the INI format from # this method generates a Rocksdb OPTIONS file in the INI format from
# the options stored in self.options_dict # the options stored in self.options_dict
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
file_name = '../temp/OPTIONS_' + str(nonce) + '.tmp' file_name = "../temp/OPTIONS_" + str(nonce) + ".tmp"
file_path = os.path.join(this_path, file_name) file_path = os.path.join(this_path, file_name)
with open(file_path, 'w') as fp: with open(file_path, "w") as fp:
for section in self.options_dict: for section in self.options_dict:
for col_fam in self.options_dict[section]: for col_fam in self.options_dict[section]:
fp.write( fp.write(OptionsSpecParser.get_section_str(section, col_fam) + "\n")
OptionsSpecParser.get_section_str(section, col_fam) +
'\n'
)
for option in self.options_dict[section][col_fam]: for option in self.options_dict[section][col_fam]:
values = self.options_dict[section][col_fam][option] values = self.options_dict[section][col_fam][option]
fp.write( fp.write(
OptionsSpecParser.get_option_str(option, values) + OptionsSpecParser.get_option_str(option, values) + "\n"
'\n'
) )
fp.write('\n') fp.write("\n")
return file_path return file_path
def check_and_trigger_conditions(self, conditions): def check_and_trigger_conditions(self, conditions):
@ -299,10 +292,14 @@ class DatabaseOptions(DataSource):
for ix, option in enumerate(cond.options): for ix, option in enumerate(cond.options):
if option not in reqd_options_dict: if option not in reqd_options_dict:
print( print(
'WARNING(DatabaseOptions.check_and_trigger): ' + "WARNING(DatabaseOptions.check_and_trigger): "
'skipping condition ' + cond.name + ' because it ' + "skipping condition "
'requires option ' + option + ' but this option is' + + cond.name
' not available' + " because it "
"requires option "
+ option
+ " but this option is"
+ " not available"
) )
missing_reqd_option = True missing_reqd_option = True
break # required option is absent break # required option is absent
@ -321,9 +318,7 @@ class DatabaseOptions(DataSource):
if eval(cond.eval_expr): if eval(cond.eval_expr):
cond.set_trigger({NO_COL_FAMILY: options}) cond.set_trigger({NO_COL_FAMILY: options})
except Exception as e: except Exception as e:
print( print("WARNING(DatabaseOptions) check_and_trigger:" + str(e))
'WARNING(DatabaseOptions) check_and_trigger:' + str(e)
)
continue continue
# for all the options that are not database-wide, we look for their # for all the options that are not database-wide, we look for their
@ -340,14 +335,9 @@ class DatabaseOptions(DataSource):
if present: if present:
try: try:
if eval(cond.eval_expr): if eval(cond.eval_expr):
col_fam_options_dict[col_fam] = ( col_fam_options_dict[col_fam] = copy.deepcopy(options)
copy.deepcopy(options)
)
except Exception as e: except Exception as e:
print( print("WARNING(DatabaseOptions) check_and_trigger: " + str(e))
'WARNING(DatabaseOptions) check_and_trigger: ' +
str(e)
)
# Trigger for an OptionCondition object is of the form: # Trigger for an OptionCondition object is of the form:
# Dict[col_fam_name: List[option_value]] # Dict[col_fam_name: List[option_value]]
# where col_fam_name is the name of a column family for which # where col_fam_name is the name of a column family for which

@ -3,17 +3,19 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.db_log_parser import Log
from advisor.db_timeseries_parser import TimeSeriesData, NO_ENTITY
import copy import copy
import glob import glob
import re import re
import subprocess import subprocess
import time import time
from typing import List
from advisor.db_log_parser import Log
from advisor.db_timeseries_parser import NO_ENTITY, TimeSeriesData
class LogStatsParser(TimeSeriesData): class LogStatsParser(TimeSeriesData):
STATS = 'STATISTICS:' STATS = "STATISTICS:"
@staticmethod @staticmethod
def parse_log_line_for_stats(log_line): def parse_log_line_for_stats(log_line):
@ -22,12 +24,8 @@ class LogStatsParser(TimeSeriesData):
token_list = log_line.strip().split() token_list = log_line.strip().split()
# token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':', # token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':',
# '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0'] # '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0']
stat_prefix = token_list[0] + '.' # 'rocksdb.db.get.micros.' stat_prefix = token_list[0] + "." # 'rocksdb.db.get.micros.'
stat_values = [ stat_values = [token for token in token_list[1:] if token != ":"]
token
for token in token_list[1:]
if token != ':'
]
# stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100', # stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100',
# '92.0'] # '92.0']
stat_dict = {} stat_dict = {}
@ -58,7 +56,7 @@ class LogStatsParser(TimeSeriesData):
# replace this with the appropriate key_prefix, remove these # replace this with the appropriate key_prefix, remove these
# characters here since the LogStatsParser does not need # characters here since the LogStatsParser does not need
# a prefix # a prefix
if key.startswith('[]'): if key.startswith("[]"):
reqd_stats.append(key[2:]) reqd_stats.append(key[2:])
else: else:
reqd_stats.append(key) reqd_stats.append(key)
@ -77,7 +75,7 @@ class LogStatsParser(TimeSeriesData):
# ... # ...
# rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n # rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n
# ..." # ..."
new_lines = log.get_message().split('\n') new_lines = log.get_message().split("\n")
# let log_ts = 1532518219 # let log_ts = 1532518219
log_ts = log.get_timestamp() log_ts = log.get_timestamp()
# example updates to keys_ts: # example updates to keys_ts:
@ -95,20 +93,17 @@ class LogStatsParser(TimeSeriesData):
# this method parses the Rocksdb LOG file and generates timeseries for # this method parses the Rocksdb LOG file and generates timeseries for
# each of the statistic in the list reqd_stats # each of the statistic in the list reqd_stats
self.keys_ts = {NO_ENTITY: {}} self.keys_ts = {NO_ENTITY: {}}
for file_name in glob.glob(self.logs_file_prefix + '*'): for file_name in glob.glob(self.logs_file_prefix + "*"):
# TODO(poojam23): find a way to distinguish between 'old' log files # TODO(poojam23): find a way to distinguish between 'old' log files
# from current and previous experiments, present in the same # from current and previous experiments, present in the same
# directory # directory
if re.search('old', file_name, re.IGNORECASE): if re.search("old", file_name, re.IGNORECASE):
continue continue
with open(file_name, 'r') as db_logs: with open(file_name, "r") as db_logs:
new_log = None new_log = None
for line in db_logs: for line in db_logs:
if Log.is_new_log(line): if Log.is_new_log(line):
if ( if new_log and re.search(self.STATS, new_log.get_message()):
new_log and
re.search(self.STATS, new_log.get_message())
):
self.add_to_timeseries(new_log, reqd_stats) self.add_to_timeseries(new_log, reqd_stats)
new_log = Log(line, column_families=[]) new_log = Log(line, column_families=[])
else: else:
@ -123,13 +118,13 @@ class DatabasePerfContext(TimeSeriesData):
# TODO(poojam23): check if any benchrunner provides PerfContext sampled at # TODO(poojam23): check if any benchrunner provides PerfContext sampled at
# regular intervals # regular intervals
def __init__(self, perf_context_ts, stats_freq_sec, cumulative): def __init__(self, perf_context_ts, stats_freq_sec, cumulative):
''' """
perf_context_ts is expected to be in the following format: perf_context_ts is expected to be in the following format:
Dict[metric, Dict[timestamp, value]], where for Dict[metric, Dict[timestamp, value]], where for
each (metric, timestamp) pair, the value is database-wide (i.e. each (metric, timestamp) pair, the value is database-wide (i.e.
summed over all the threads involved) summed over all the threads involved)
if stats_freq_sec == 0, per-metric only one value is reported if stats_freq_sec == 0, per-metric only one value is reported
''' """
super().__init__() super().__init__()
self.stats_freq_sec = stats_freq_sec self.stats_freq_sec = stats_freq_sec
self.keys_ts = {NO_ENTITY: perf_context_ts} self.keys_ts = {NO_ENTITY: perf_context_ts}
@ -148,11 +143,11 @@ class DatabasePerfContext(TimeSeriesData):
continue continue
for ix, ts in enumerate(timeseries[:-1]): for ix, ts in enumerate(timeseries[:-1]):
epoch_ts[NO_ENTITY][stat][ts] = ( epoch_ts[NO_ENTITY][stat][ts] = (
epoch_ts[NO_ENTITY][stat][ts] - epoch_ts[NO_ENTITY][stat][ts]
epoch_ts[NO_ENTITY][stat][timeseries[ix+1]] - epoch_ts[NO_ENTITY][stat][timeseries[ix + 1]]
) )
if epoch_ts[NO_ENTITY][stat][ts] < 0: if epoch_ts[NO_ENTITY][stat][ts] < 0:
raise ValueError('DBPerfContext: really cumulative?') raise ValueError("DBPerfContext: really cumulative?")
# drop the smallest timestamp in the timeseries for this metric # drop the smallest timestamp in the timeseries for this metric
epoch_ts[NO_ENTITY][stat].pop(timeseries[-1]) epoch_ts[NO_ENTITY][stat].pop(timeseries[-1])
self.keys_ts = epoch_ts self.keys_ts = epoch_ts
@ -171,8 +166,8 @@ class DatabasePerfContext(TimeSeriesData):
class OdsStatsFetcher(TimeSeriesData): class OdsStatsFetcher(TimeSeriesData):
# class constants # class constants
OUTPUT_FILE = 'temp/stats_out.tmp' OUTPUT_FILE = "temp/stats_out.tmp"
ERROR_FILE = 'temp/stats_err.tmp' ERROR_FILE = "temp/stats_err.tmp"
RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime" RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime"
# static methods # static methods
@ -183,9 +178,9 @@ class OdsStatsFetcher(TimeSeriesData):
@staticmethod @staticmethod
def _get_time_value_pair(pair_string): def _get_time_value_pair(pair_string):
# example pair_string: '[1532544591, 97.3653601828]' # example pair_string: '[1532544591, 97.3653601828]'
pair_string = pair_string.replace('[', '') pair_string = pair_string.replace("[", "")
pair_string = pair_string.replace(']', '') pair_string = pair_string.replace("]", "")
pair = pair_string.split(',') pair = pair_string.split(",")
first = int(pair[0].strip()) first = int(pair[0].strip())
second = float(pair[1].strip()) second = float(pair[1].strip())
return [first, second] return [first, second]
@ -193,12 +188,10 @@ class OdsStatsFetcher(TimeSeriesData):
@staticmethod @staticmethod
def _get_ods_cli_stime(start_time): def _get_ods_cli_stime(start_time):
diff = int(time.time() - int(start_time)) diff = int(time.time() - int(start_time))
stime = str(diff) + '_s' stime = str(diff) + "_s"
return stime return stime
def __init__( def __init__(self, client, entities, start_time, end_time, key_prefix=None):
self, client, entities, start_time, end_time, key_prefix=None
):
super().__init__() super().__init__()
self.client = client self.client = client
self.entities = entities self.entities = entities
@ -209,7 +202,7 @@ class OdsStatsFetcher(TimeSeriesData):
self.duration_sec = 60 self.duration_sec = 60
def execute_script(self, command): def execute_script(self, command):
print('executing...') print("executing...")
print(command) print(command)
out_file = open(self.OUTPUT_FILE, "w+") out_file = open(self.OUTPUT_FILE, "w+")
err_file = open(self.ERROR_FILE, "w+") err_file = open(self.ERROR_FILE, "w+")
@ -222,9 +215,9 @@ class OdsStatsFetcher(TimeSeriesData):
# <entity_name>\t<key_name>\t[[ts, value], [ts, value], ...] # <entity_name>\t<key_name>\t[[ts, value], [ts, value], ...]
# ts = timestamp; value = value of key_name in entity_name at time ts # ts = timestamp; value = value of key_name in entity_name at time ts
self.keys_ts = {} self.keys_ts = {}
with open(self.OUTPUT_FILE, 'r') as fp: with open(self.OUTPUT_FILE, "r") as fp:
for line in fp: for line in fp:
token_list = line.strip().split('\t') token_list = line.strip().split("\t")
entity = token_list[0] entity = token_list[0]
key = token_list[1] key = token_list[1]
if entity not in self.keys_ts: if entity not in self.keys_ts:
@ -233,7 +226,7 @@ class OdsStatsFetcher(TimeSeriesData):
self.keys_ts[entity][key] = {} self.keys_ts[entity][key] = {}
list_of_lists = [ list_of_lists = [
self._get_time_value_pair(pair_string) self._get_time_value_pair(pair_string)
for pair_string in token_list[2].split('],') for pair_string in token_list[2].split("],")
] ]
value = {pair[0]: pair[1] for pair in list_of_lists} value = {pair[0]: pair[1] for pair in list_of_lists}
self.keys_ts[entity][key] = value self.keys_ts[entity][key] = value
@ -243,7 +236,7 @@ class OdsStatsFetcher(TimeSeriesData):
# <entity_name>\t<key_name>\t<timestamp>\t<value> # <entity_name>\t<key_name>\t<timestamp>\t<value>
# there is one line per (entity_name, key_name, timestamp) # there is one line per (entity_name, key_name, timestamp)
self.keys_ts = {} self.keys_ts = {}
with open(self.OUTPUT_FILE, 'r') as fp: with open(self.OUTPUT_FILE, "r") as fp:
for line in fp: for line in fp:
token_list = line.split() token_list = line.split()
entity = token_list[0] entity = token_list[0]
@ -257,25 +250,29 @@ class OdsStatsFetcher(TimeSeriesData):
def fetch_timeseries(self, statistics): def fetch_timeseries(self, statistics):
# this method fetches the timeseries of required stats from the ODS # this method fetches the timeseries of required stats from the ODS
# service and populates the 'keys_ts' object appropriately # service and populates the 'keys_ts' object appropriately
print('OdsStatsFetcher: fetching ' + str(statistics)) print("OdsStatsFetcher: fetching " + str(statistics))
if re.search('rapido', self.client, re.IGNORECASE): if re.search("rapido", self.client, re.IGNORECASE):
command = self.RAPIDO_COMMAND % ( command = self.RAPIDO_COMMAND % (
self.client, self.client,
self._get_string_in_quotes(self.entities), self._get_string_in_quotes(self.entities),
self._get_string_in_quotes(','.join(statistics)), self._get_string_in_quotes(",".join(statistics)),
self._get_string_in_quotes(self.start_time), self._get_string_in_quotes(self.start_time),
self._get_string_in_quotes(self.end_time) self._get_string_in_quotes(self.end_time),
) )
# Run the tool and fetch the time-series data # Run the tool and fetch the time-series data
self.execute_script(command) self.execute_script(command)
# Parse output and populate the 'keys_ts' map # Parse output and populate the 'keys_ts' map
self.parse_rapido_output() self.parse_rapido_output()
elif re.search('ods', self.client, re.IGNORECASE): elif re.search("ods", self.client, re.IGNORECASE):
command = ( command = (
self.client + ' ' + self.client
'--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' + + " "
self._get_string_in_quotes(self.entities) + ' ' + + "--stime="
self._get_string_in_quotes(','.join(statistics)) + self._get_ods_cli_stime(self.start_time)
+ " "
+ self._get_string_in_quotes(self.entities)
+ " "
+ self._get_string_in_quotes(",".join(statistics))
) )
# Run the tool and fetch the time-series data # Run the tool and fetch the time-series data
self.execute_script(command) self.execute_script(command)
@ -287,7 +284,7 @@ class OdsStatsFetcher(TimeSeriesData):
for cond in conditions: for cond in conditions:
for key in cond.keys: for key in cond.keys:
use_prefix = False use_prefix = False
if key.startswith('[]'): if key.startswith("[]"):
use_prefix = True use_prefix = True
key = key[2:] key = key[2:]
# TODO(poojam23): this is very hacky and needs to be improved # TODO(poojam23): this is very hacky and needs to be improved
@ -295,15 +292,15 @@ class OdsStatsFetcher(TimeSeriesData):
key += ".60" key += ".60"
if use_prefix: if use_prefix:
if not self.key_prefix: if not self.key_prefix:
print('Warning: OdsStatsFetcher might need key prefix') print("Warning: OdsStatsFetcher might need key prefix")
print('for the key: ' + key) print("for the key: " + key)
else: else:
key = self.key_prefix + "." + key key = self.key_prefix + "." + key
reqd_stats.append(key) reqd_stats.append(key)
return reqd_stats return reqd_stats
def fetch_rate_url(self, entities, keys, window_len, percent, display): def fetch_rate_url(self, entities: List[str], keys: List[str],
# type: (List[str], List[str], str, str, bool) -> str window_len: str, percent: str, display: bool) -> str:
transform_desc = ( transform_desc = (
"rate(" + str(window_len) + ",duration=" + str(self.duration_sec) "rate(" + str(window_len) + ",duration=" + str(self.duration_sec)
) )
@ -311,28 +308,33 @@ class OdsStatsFetcher(TimeSeriesData):
transform_desc = transform_desc + ",%)" transform_desc = transform_desc + ",%)"
else: else:
transform_desc = transform_desc + ")" transform_desc = transform_desc + ")"
if re.search('rapido', self.client, re.IGNORECASE): if re.search("rapido", self.client, re.IGNORECASE):
command = self.RAPIDO_COMMAND + " --transform=%s --url=%s" command = self.RAPIDO_COMMAND + " --transform=%s --url=%s"
command = command % ( command = command % (
self.client, self.client,
self._get_string_in_quotes(','.join(entities)), self._get_string_in_quotes(",".join(entities)),
self._get_string_in_quotes(','.join(keys)), self._get_string_in_quotes(",".join(keys)),
self._get_string_in_quotes(self.start_time), self._get_string_in_quotes(self.start_time),
self._get_string_in_quotes(self.end_time), self._get_string_in_quotes(self.end_time),
self._get_string_in_quotes(transform_desc), self._get_string_in_quotes(transform_desc),
self._get_string_in_quotes(display) self._get_string_in_quotes(display),
) )
elif re.search('ods', self.client, re.IGNORECASE): elif re.search("ods", self.client, re.IGNORECASE):
command = ( command = (
self.client + ' ' + self.client
'--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' + + " "
'--fburlonly ' + + "--stime="
self._get_string_in_quotes(entities) + ' ' + + self._get_ods_cli_stime(self.start_time)
self._get_string_in_quotes(','.join(keys)) + ' ' + + " "
self._get_string_in_quotes(transform_desc) + "--fburlonly "
+ self._get_string_in_quotes(entities)
+ " "
+ self._get_string_in_quotes(",".join(keys))
+ " "
+ self._get_string_in_quotes(transform_desc)
) )
self.execute_script(command) self.execute_script(command)
url = "" url = ""
with open(self.OUTPUT_FILE, 'r') as fp: with open(self.OUTPUT_FILE, "r") as fp:
url = fp.readline() url = fp.readline()
return url return url

@ -3,13 +3,15 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import math
from abc import abstractmethod from abc import abstractmethod
from advisor.db_log_parser import DataSource
from enum import Enum from enum import Enum
import math from typing import Dict
from advisor.db_log_parser import DataSource
NO_ENTITY = 'ENTITY_PLACEHOLDER' NO_ENTITY = "ENTITY_PLACEHOLDER"
class TimeSeriesData(DataSource): class TimeSeriesData(DataSource):
@ -42,10 +44,8 @@ class TimeSeriesData(DataSource):
# for each of them and populates the 'keys_ts' dictionary # for each of them and populates the 'keys_ts' dictionary
pass pass
def fetch_burst_epochs( def fetch_burst_epochs(self, entities: str, statistic: int,
self, entities, statistic, window_sec, threshold, percent window_sec: float, threshold: bool, percent: bool) -> Dict[str, Dict[int, float]]:
):
# type: (str, int, float, bool) -> Dict[str, Dict[int, float]]
# this method calculates the (percent) rate change in the 'statistic' # this method calculates the (percent) rate change in the 'statistic'
# for each entity (over 'window_sec' seconds) and returns the epochs # for each entity (over 'window_sec' seconds) and returns the epochs
# where this rate change is greater than or equal to the 'threshold' # where this rate change is greater than or equal to the 'threshold'
@ -90,7 +90,6 @@ class TimeSeriesData(DataSource):
return burst_epochs return burst_epochs
def fetch_aggregated_values(self, entity, statistics, aggregation_op): def fetch_aggregated_values(self, entity, statistics, aggregation_op):
# type: (str, AggregationOperator) -> Dict[str, float]
# this method performs the aggregation specified by 'aggregation_op' # this method performs the aggregation specified by 'aggregation_op'
# on the timeseries of 'statistics' for 'entity' and returns: # on the timeseries of 'statistics' for 'entity' and returns:
# Dict[statistic, aggregated_value] # Dict[statistic, aggregated_value]
@ -145,7 +144,7 @@ class TimeSeriesData(DataSource):
complete_keys[0], # there should be only one key complete_keys[0], # there should be only one key
cond.window_sec, cond.window_sec,
cond.rate_threshold, cond.rate_threshold,
True True,
) )
# Trigger in this case is: # Trigger in this case is:
# Dict[entity_name, Dict[timestamp, rate_change]] # Dict[entity_name, Dict[timestamp, rate_change]]
@ -156,32 +155,28 @@ class TimeSeriesData(DataSource):
cond.set_trigger(result) cond.set_trigger(result)
elif cond.behavior is self.Behavior.evaluate_expression: elif cond.behavior is self.Behavior.evaluate_expression:
self.handle_evaluate_expression( self.handle_evaluate_expression(
cond, cond, complete_keys, entities_with_stats
complete_keys,
entities_with_stats
) )
def handle_evaluate_expression(self, condition, statistics, entities): def handle_evaluate_expression(self, condition, statistics, entities):
trigger = {} trigger = {}
# check 'condition' for each of these entities # check 'condition' for each of these entities
for entity in entities: for entity in entities:
if hasattr(condition, 'aggregation_op'): if hasattr(condition, "aggregation_op"):
# in this case, the aggregation operation is performed on each # in this case, the aggregation operation is performed on each
# of the condition's 'keys' and then with aggregated values # of the condition's 'keys' and then with aggregated values
# condition's 'expression' is evaluated; if it evaluates to # condition's 'expression' is evaluated; if it evaluates to
# True, then list of the keys values is added to the # True, then list of the keys values is added to the
# condition's trigger: Dict[entity_name, List[stats]] # condition's trigger: Dict[entity_name, List[stats]]
result = self.fetch_aggregated_values( result = self.fetch_aggregated_values(
entity, statistics, condition.aggregation_op entity, statistics, condition.aggregation_op
) )
keys = [result[key] for key in statistics] keys = [result[key] for key in statistics]
try: try:
if eval(condition.expression): if eval(condition.expression):
trigger[entity] = keys trigger[entity] = keys
except Exception as e: except Exception as e:
print( print("WARNING(TimeSeriesData) check_and_trigger: " + str(e))
'WARNING(TimeSeriesData) check_and_trigger: ' + str(e)
)
else: else:
# assumption: all stats have same series of timestamps # assumption: all stats have same series of timestamps
# this is similar to the above but 'expression' is evaluated at # this is similar to the above but 'expression' is evaluated at
@ -190,19 +185,13 @@ class TimeSeriesData(DataSource):
# 'expression' evaluated to true; so trigger is: # 'expression' evaluated to true; so trigger is:
# Dict[entity, Dict[timestamp, List[stats]]] # Dict[entity, Dict[timestamp, List[stats]]]
for epoch in self.keys_ts[entity][statistics[0]].keys(): for epoch in self.keys_ts[entity][statistics[0]].keys():
keys = [ keys = [self.keys_ts[entity][key][epoch] for key in statistics]
self.keys_ts[entity][key][epoch]
for key in statistics
]
try: try:
if eval(condition.expression): if eval(condition.expression):
if entity not in trigger: if entity not in trigger:
trigger[entity] = {} trigger[entity] = {}
trigger[entity][epoch] = keys trigger[entity][epoch] = keys
except Exception as e: except Exception as e:
print( print("WARNING(TimeSeriesData) check_and_trigger: " + str(e))
'WARNING(TimeSeriesData) check_and_trigger: ' +
str(e)
)
if trigger: if trigger:
condition.set_trigger(trigger) condition.set_trigger(trigger)

@ -17,7 +17,7 @@ class IniParser:
@staticmethod @staticmethod
def remove_trailing_comment(line): def remove_trailing_comment(line):
line = line.strip() line = line.strip()
comment_start = line.find('#') comment_start = line.find("#")
if comment_start > -1: if comment_start > -1:
return line[:comment_start] return line[:comment_start]
return line return line
@ -27,7 +27,7 @@ class IniParser:
# A section header looks like: [Rule "my-new-rule"]. Essentially, # A section header looks like: [Rule "my-new-rule"]. Essentially,
# a line that is in square-brackets. # a line that is in square-brackets.
line = line.strip() line = line.strip()
if line.startswith('[') and line.endswith(']'): if line.startswith("[") and line.endswith("]"):
return True return True
return False return False
@ -38,7 +38,7 @@ class IniParser:
token_list = line.strip()[1:-1].split('"') token_list = line.strip()[1:-1].split('"')
if len(token_list) < 3: if len(token_list) < 3:
error = 'needed section header: [<section_type> "<section_name>"]' error = 'needed section header: [<section_type> "<section_name>"]'
raise ValueError('Parsing error: ' + error + '\n' + line) raise ValueError("Parsing error: " + error + "\n" + line)
return token_list[1] return token_list[1]
@staticmethod @staticmethod
@ -47,22 +47,22 @@ class IniParser:
if not line: if not line:
return IniParser.Element.comment return IniParser.Element.comment
if IniParser.is_section_header(line): if IniParser.is_section_header(line):
if line.strip()[1:-1].startswith('Suggestion'): if line.strip()[1:-1].startswith("Suggestion"):
return IniParser.Element.sugg return IniParser.Element.sugg
if line.strip()[1:-1].startswith('Rule'): if line.strip()[1:-1].startswith("Rule"):
return IniParser.Element.rule return IniParser.Element.rule
if line.strip()[1:-1].startswith('Condition'): if line.strip()[1:-1].startswith("Condition"):
return IniParser.Element.cond return IniParser.Element.cond
if '=' in line: if "=" in line:
return IniParser.Element.key_val return IniParser.Element.key_val
error = 'not a recognizable RulesSpec element' error = "not a recognizable RulesSpec element"
raise ValueError('Parsing error: ' + error + '\n' + line) raise ValueError("Parsing error: " + error + "\n" + line)
@staticmethod @staticmethod
def get_key_value_pair(line): def get_key_value_pair(line):
line = line.strip() line = line.strip()
key = line.split('=')[0].strip() key = line.split("=")[0].strip()
value = "=".join(line.split('=')[1:]) value = "=".join(line.split("=")[1:])
if value == "": # if the option has no value if value == "": # if the option has no value
return (key, None) return (key, None)
values = IniParser.get_list_from_value(value) values = IniParser.get_list_from_value(value)
@ -72,5 +72,5 @@ class IniParser:
@staticmethod @staticmethod
def get_list_from_value(value): def get_list_from_value(value):
values = value.strip().split(':') values = value.strip().split(":")
return values return values

@ -3,12 +3,13 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import re
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from enum import Enum
from advisor.db_log_parser import DataSource, NO_COL_FAMILY from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.db_timeseries_parser import TimeSeriesData from advisor.db_timeseries_parser import TimeSeriesData
from enum import Enum
from advisor.ini_parser import IniParser from advisor.ini_parser import IniParser
import re
class Section(ABC): class Section(ABC):
@ -38,17 +39,17 @@ class Rule(Section):
# value will be a string and not a list. Hence, convert it to a single # value will be a string and not a list. Hence, convert it to a single
# element list before storing it in self.suggestions or # element list before storing it in self.suggestions or
# self.conditions. # self.conditions.
if key == 'conditions': if key == "conditions":
if isinstance(value, str): if isinstance(value, str):
self.conditions = [value] self.conditions = [value]
else: else:
self.conditions = value self.conditions = value
elif key == 'suggestions': elif key == "suggestions":
if isinstance(value, str): if isinstance(value, str):
self.suggestions = [value] self.suggestions = [value]
else: else:
self.suggestions = value self.suggestions = value
elif key == 'overlap_time_period': elif key == "overlap_time_period":
self.overlap_time_seconds = value self.overlap_time_seconds = value
def get_suggestions(self): def get_suggestions(self):
@ -56,35 +57,29 @@ class Rule(Section):
def perform_checks(self): def perform_checks(self):
if not self.conditions or len(self.conditions) < 1: if not self.conditions or len(self.conditions) < 1:
raise ValueError( raise ValueError(self.name + ": rule must have at least one condition")
self.name + ': rule must have at least one condition'
)
if not self.suggestions or len(self.suggestions) < 1: if not self.suggestions or len(self.suggestions) < 1:
raise ValueError( raise ValueError(self.name + ": rule must have at least one suggestion")
self.name + ': rule must have at least one suggestion'
)
if self.overlap_time_seconds: if self.overlap_time_seconds:
if len(self.conditions) != 2: if len(self.conditions) != 2:
raise ValueError( raise ValueError(
self.name + ": rule must be associated with 2 conditions\ self.name
+ ": rule must be associated with 2 conditions\
in order to check for a time dependency between them" in order to check for a time dependency between them"
) )
time_format = '^\d+[s|m|h|d]$' time_format = "^\d+[s|m|h|d]$" # noqa
if ( if not re.match(time_format, self.overlap_time_seconds, re.IGNORECASE):
not
re.match(time_format, self.overlap_time_seconds, re.IGNORECASE)
):
raise ValueError( raise ValueError(
self.name + ": overlap_time_seconds format: \d+[s|m|h|d]" self.name + ": overlap_time_seconds format: \d+[s|m|h|d]"
) )
else: # convert to seconds else: # convert to seconds
in_seconds = int(self.overlap_time_seconds[:-1]) in_seconds = int(self.overlap_time_seconds[:-1])
if self.overlap_time_seconds[-1] == 'm': if self.overlap_time_seconds[-1] == "m":
in_seconds *= 60 in_seconds *= 60
elif self.overlap_time_seconds[-1] == 'h': elif self.overlap_time_seconds[-1] == "h":
in_seconds *= (60 * 60) in_seconds *= 60 * 60
elif self.overlap_time_seconds[-1] == 'd': elif self.overlap_time_seconds[-1] == "d":
in_seconds *= (24 * 60 * 60) in_seconds *= 24 * 60 * 60
self.overlap_time_seconds = in_seconds self.overlap_time_seconds = in_seconds
def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs): def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs):
@ -93,28 +88,25 @@ class Rule(Section):
# (if present) the first pair of timestamps at which the 2 conditions # (if present) the first pair of timestamps at which the 2 conditions
# were triggered within 'overlap_time_seconds' of each other # were triggered within 'overlap_time_seconds' of each other
key1_lower_bounds = [ key1_lower_bounds = [
epoch - self.overlap_time_seconds epoch - self.overlap_time_seconds for epoch in key1_trigger_epochs
for epoch in key1_trigger_epochs
] ]
key1_lower_bounds.sort() key1_lower_bounds.sort()
key2_trigger_epochs.sort() key2_trigger_epochs.sort()
trigger_ix = 0 trigger_ix = 0
overlap_pair = None overlap_pair = None
for key1_lb in key1_lower_bounds: for key1_lb in key1_lower_bounds:
while ( while key2_trigger_epochs[trigger_ix] < key1_lb and trigger_ix < len(
key2_trigger_epochs[trigger_ix] < key1_lb and key2_trigger_epochs
trigger_ix < len(key2_trigger_epochs)
): ):
trigger_ix += 1 trigger_ix += 1
if trigger_ix >= len(key2_trigger_epochs): if trigger_ix >= len(key2_trigger_epochs):
break break
if ( if key2_trigger_epochs[trigger_ix] <= key1_lb + (
key2_trigger_epochs[trigger_ix] <= 2 * self.overlap_time_seconds
key1_lb + (2 * self.overlap_time_seconds)
): ):
overlap_pair = ( overlap_pair = (
key2_trigger_epochs[trigger_ix], key2_trigger_epochs[trigger_ix],
key1_lb + self.overlap_time_seconds key1_lb + self.overlap_time_seconds,
) )
break break
return overlap_pair return overlap_pair
@ -130,10 +122,10 @@ class Rule(Section):
condition1 = conditions_dict[self.conditions[0]] condition1 = conditions_dict[self.conditions[0]]
condition2 = conditions_dict[self.conditions[1]] condition2 = conditions_dict[self.conditions[1]]
if not ( if not (
condition1.get_data_source() is DataSource.Type.TIME_SERIES and condition1.get_data_source() is DataSource.Type.TIME_SERIES
condition2.get_data_source() is DataSource.Type.TIME_SERIES and condition2.get_data_source() is DataSource.Type.TIME_SERIES
): ):
raise ValueError(self.name + ': need 2 timeseries conditions') raise ValueError(self.name + ": need 2 timeseries conditions")
map1 = condition1.get_trigger() map1 = condition1.get_trigger()
map2 = condition2.get_trigger() map2 = condition2.get_trigger()
@ -142,14 +134,10 @@ class Rule(Section):
self.trigger_entities = {} self.trigger_entities = {}
is_triggered = False is_triggered = False
entity_intersection = ( entity_intersection = set(map1.keys()).intersection(set(map2.keys()))
set(map1.keys()).intersection(set(map2.keys()))
)
for entity in entity_intersection: for entity in entity_intersection:
overlap_timestamps_pair = ( overlap_timestamps_pair = self.get_overlap_timestamps(
self.get_overlap_timestamps( list(map1[entity].keys()), list(map2[entity].keys())
list(map1[entity].keys()), list(map2[entity].keys())
)
) )
if overlap_timestamps_pair: if overlap_timestamps_pair:
self.trigger_entities[entity] = overlap_timestamps_pair self.trigger_entities[entity] = overlap_timestamps_pair
@ -166,8 +154,8 @@ class Rule(Section):
all_conditions_triggered = False all_conditions_triggered = False
break break
if ( if (
cond.get_data_source() is DataSource.Type.LOG or cond.get_data_source() is DataSource.Type.LOG
cond.get_data_source() is DataSource.Type.DB_OPTIONS or cond.get_data_source() is DataSource.Type.DB_OPTIONS
): ):
cond_col_fam = set(cond.get_trigger().keys()) cond_col_fam = set(cond.get_trigger().keys())
if NO_COL_FAMILY in cond_col_fam: if NO_COL_FAMILY in cond_col_fam:
@ -180,8 +168,8 @@ class Rule(Section):
if self.trigger_entities is None: if self.trigger_entities is None:
self.trigger_entities = cond_entities self.trigger_entities = cond_entities
else: else:
self.trigger_entities = ( self.trigger_entities = self.trigger_entities.intersection(
self.trigger_entities.intersection(cond_entities) cond_entities
) )
if not (self.trigger_entities or self.trigger_column_families): if not (self.trigger_entities or self.trigger_column_families):
all_conditions_triggered = False all_conditions_triggered = False
@ -200,7 +188,7 @@ class Rule(Section):
rule_string += cond rule_string += cond
is_first = False is_first = False
else: else:
rule_string += (" AND " + cond) rule_string += " AND " + cond
# Append suggestions # Append suggestions
rule_string += "\nsuggestions:: " rule_string += "\nsuggestions:: "
is_first = True is_first = True
@ -209,11 +197,11 @@ class Rule(Section):
rule_string += sugg rule_string += sugg
is_first = False is_first = False
else: else:
rule_string += (", " + sugg) rule_string += ", " + sugg
if self.trigger_entities: if self.trigger_entities:
rule_string += (', entities:: ' + str(self.trigger_entities)) rule_string += ", entities:: " + str(self.trigger_entities)
if self.trigger_column_families: if self.trigger_column_families:
rule_string += (', col_fam:: ' + str(self.trigger_column_families)) rule_string += ", col_fam:: " + str(self.trigger_column_families)
# Return constructed string # Return constructed string
return rule_string return rule_string
@ -232,7 +220,7 @@ class Suggestion(Section):
self.description = None self.description = None
def set_parameter(self, key, value): def set_parameter(self, key, value):
if key == 'option': if key == "option":
# Note: # Note:
# case 1: 'option' is supported by Rocksdb OPTIONS file; in this # case 1: 'option' is supported by Rocksdb OPTIONS file; in this
# case the option belongs to one of the sections in the config # case the option belongs to one of the sections in the config
@ -240,41 +228,35 @@ class Suggestion(Section):
# case 2: 'option' is not supported by Rocksdb OPTIONS file; the # case 2: 'option' is not supported by Rocksdb OPTIONS file; the
# option is not expected to have the character '.' in its name # option is not expected to have the character '.' in its name
self.option = value self.option = value
elif key == 'action': elif key == "action":
if self.option and not value: if self.option and not value:
raise ValueError(self.name + ': provide action for option') raise ValueError(self.name + ": provide action for option")
self.action = self.Action[value] self.action = self.Action[value]
elif key == 'suggested_values': elif key == "suggested_values":
if isinstance(value, str): if isinstance(value, str):
self.suggested_values = [value] self.suggested_values = [value]
else: else:
self.suggested_values = value self.suggested_values = value
elif key == 'description': elif key == "description":
self.description = value self.description = value
def perform_checks(self): def perform_checks(self):
if not self.description: if not self.description:
if not self.option: if not self.option:
raise ValueError(self.name + ': provide option or description') raise ValueError(self.name + ": provide option or description")
if not self.action: if not self.action:
raise ValueError(self.name + ': provide action for option') raise ValueError(self.name + ": provide action for option")
if self.action is self.Action.set and not self.suggested_values: if self.action is self.Action.set and not self.suggested_values:
raise ValueError( raise ValueError(self.name + ": provide suggested value for option")
self.name + ': provide suggested value for option'
)
def __repr__(self): def __repr__(self):
sugg_string = "Suggestion: " + self.name sugg_string = "Suggestion: " + self.name
if self.description: if self.description:
sugg_string += (' description : ' + self.description) sugg_string += " description : " + self.description
else: else:
sugg_string += ( sugg_string += " option : " + self.option + " action : " + self.action.name
' option : ' + self.option + ' action : ' + self.action.name
)
if self.suggested_values: if self.suggested_values:
sugg_string += ( sugg_string += " suggested_values : " + str(self.suggested_values)
' suggested_values : ' + str(self.suggested_values)
)
return sugg_string return sugg_string
@ -286,7 +268,7 @@ class Condition(Section):
def perform_checks(self): def perform_checks(self):
if not self.data_source: if not self.data_source:
raise ValueError(self.name + ': condition not tied to data source') raise ValueError(self.name + ": condition not tied to data source")
def set_data_source(self, data_source): def set_data_source(self, data_source):
self.data_source = data_source self.data_source = data_source
@ -310,28 +292,28 @@ class Condition(Section):
def set_parameter(self, key, value): def set_parameter(self, key, value):
# must be defined by the subclass # must be defined by the subclass
raise NotImplementedError(self.name + ': provide source for condition') raise NotImplementedError(self.name + ": provide source for condition")
class LogCondition(Condition): class LogCondition(Condition):
@classmethod @classmethod
def create(cls, base_condition): def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['LOG']) base_condition.set_data_source(DataSource.Type["LOG"])
base_condition.__class__ = cls base_condition.__class__ = cls
return base_condition return base_condition
def set_parameter(self, key, value): def set_parameter(self, key, value):
if key == 'regex': if key == "regex":
self.regex = value self.regex = value
def perform_checks(self): def perform_checks(self):
super().perform_checks() super().perform_checks()
if not self.regex: if not self.regex:
raise ValueError(self.name + ': provide regex for log condition') raise ValueError(self.name + ": provide regex for log condition")
def __repr__(self): def __repr__(self):
log_cond_str = "LogCondition: " + self.name log_cond_str = "LogCondition: " + self.name
log_cond_str += (" regex: " + self.regex) log_cond_str += " regex: " + self.regex
# if self.trigger: # if self.trigger:
# log_cond_str += (" trigger: " + str(self.trigger)) # log_cond_str += (" trigger: " + str(self.trigger))
return log_cond_str return log_cond_str
@ -340,90 +322,90 @@ class LogCondition(Condition):
class OptionCondition(Condition): class OptionCondition(Condition):
@classmethod @classmethod
def create(cls, base_condition): def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['DB_OPTIONS']) base_condition.set_data_source(DataSource.Type["DB_OPTIONS"])
base_condition.__class__ = cls base_condition.__class__ = cls
return base_condition return base_condition
def set_parameter(self, key, value): def set_parameter(self, key, value):
if key == 'options': if key == "options":
if isinstance(value, str): if isinstance(value, str):
self.options = [value] self.options = [value]
else: else:
self.options = value self.options = value
elif key == 'evaluate': elif key == "evaluate":
self.eval_expr = value self.eval_expr = value
def perform_checks(self): def perform_checks(self):
super().perform_checks() super().perform_checks()
if not self.options: if not self.options:
raise ValueError(self.name + ': options missing in condition') raise ValueError(self.name + ": options missing in condition")
if not self.eval_expr: if not self.eval_expr:
raise ValueError(self.name + ': expression missing in condition') raise ValueError(self.name + ": expression missing in condition")
def __repr__(self): def __repr__(self):
opt_cond_str = "OptionCondition: " + self.name opt_cond_str = "OptionCondition: " + self.name
opt_cond_str += (" options: " + str(self.options)) opt_cond_str += " options: " + str(self.options)
opt_cond_str += (" expression: " + self.eval_expr) opt_cond_str += " expression: " + self.eval_expr
if self.trigger: if self.trigger:
opt_cond_str += (" trigger: " + str(self.trigger)) opt_cond_str += " trigger: " + str(self.trigger)
return opt_cond_str return opt_cond_str
class TimeSeriesCondition(Condition): class TimeSeriesCondition(Condition):
@classmethod @classmethod
def create(cls, base_condition): def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['TIME_SERIES']) base_condition.set_data_source(DataSource.Type["TIME_SERIES"])
base_condition.__class__ = cls base_condition.__class__ = cls
return base_condition return base_condition
def set_parameter(self, key, value): def set_parameter(self, key, value):
if key == 'keys': if key == "keys":
if isinstance(value, str): if isinstance(value, str):
self.keys = [value] self.keys = [value]
else: else:
self.keys = value self.keys = value
elif key == 'behavior': elif key == "behavior":
self.behavior = TimeSeriesData.Behavior[value] self.behavior = TimeSeriesData.Behavior[value]
elif key == 'rate_threshold': elif key == "rate_threshold":
self.rate_threshold = float(value) self.rate_threshold = float(value)
elif key == 'window_sec': elif key == "window_sec":
self.window_sec = int(value) self.window_sec = int(value)
elif key == 'evaluate': elif key == "evaluate":
self.expression = value self.expression = value
elif key == 'aggregation_op': elif key == "aggregation_op":
self.aggregation_op = TimeSeriesData.AggregationOperator[value] self.aggregation_op = TimeSeriesData.AggregationOperator[value]
def perform_checks(self): def perform_checks(self):
if not self.keys: if not self.keys:
raise ValueError(self.name + ': specify timeseries key') raise ValueError(self.name + ": specify timeseries key")
if not self.behavior: if not self.behavior:
raise ValueError(self.name + ': specify triggering behavior') raise ValueError(self.name + ": specify triggering behavior")
if self.behavior is TimeSeriesData.Behavior.bursty: if self.behavior is TimeSeriesData.Behavior.bursty:
if not self.rate_threshold: if not self.rate_threshold:
raise ValueError(self.name + ': specify rate burst threshold') raise ValueError(self.name + ": specify rate burst threshold")
if not self.window_sec: if not self.window_sec:
self.window_sec = 300 # default window length is 5 minutes self.window_sec = 300 # default window length is 5 minutes
if len(self.keys) > 1: if len(self.keys) > 1:
raise ValueError(self.name + ': specify only one key') raise ValueError(self.name + ": specify only one key")
elif self.behavior is TimeSeriesData.Behavior.evaluate_expression: elif self.behavior is TimeSeriesData.Behavior.evaluate_expression:
if not (self.expression): if not (self.expression):
raise ValueError(self.name + ': specify evaluation expression') raise ValueError(self.name + ": specify evaluation expression")
else: else:
raise ValueError(self.name + ': trigger behavior not supported') raise ValueError(self.name + ": trigger behavior not supported")
def __repr__(self): def __repr__(self):
ts_cond_str = "TimeSeriesCondition: " + self.name ts_cond_str = "TimeSeriesCondition: " + self.name
ts_cond_str += (" statistics: " + str(self.keys)) ts_cond_str += " statistics: " + str(self.keys)
ts_cond_str += (" behavior: " + self.behavior.name) ts_cond_str += " behavior: " + self.behavior.name
if self.behavior is TimeSeriesData.Behavior.bursty: if self.behavior is TimeSeriesData.Behavior.bursty:
ts_cond_str += (" rate_threshold: " + str(self.rate_threshold)) ts_cond_str += " rate_threshold: " + str(self.rate_threshold)
ts_cond_str += (" window_sec: " + str(self.window_sec)) ts_cond_str += " window_sec: " + str(self.window_sec)
if self.behavior is TimeSeriesData.Behavior.evaluate_expression: if self.behavior is TimeSeriesData.Behavior.evaluate_expression:
ts_cond_str += (" expression: " + self.expression) ts_cond_str += " expression: " + self.expression
if hasattr(self, 'aggregation_op'): if hasattr(self, "aggregation_op"):
ts_cond_str += (" aggregation_op: " + self.aggregation_op.name) ts_cond_str += " aggregation_op: " + self.aggregation_op.name
if self.trigger: if self.trigger:
ts_cond_str += (" trigger: " + str(self.trigger)) ts_cond_str += " trigger: " + str(self.trigger)
return ts_cond_str return ts_cond_str
@ -446,7 +428,7 @@ class RulesSpec:
def load_rules_from_spec(self): def load_rules_from_spec(self):
self.initialise_fields() self.initialise_fields()
with open(self.file_path, 'r') as db_rules: with open(self.file_path, "r") as db_rules:
curr_section = None curr_section = None
for line in db_rules: for line in db_rules:
line = IniParser.remove_trailing_comment(line) line = IniParser.remove_trailing_comment(line)
@ -472,12 +454,12 @@ class RulesSpec:
if curr_section is IniParser.Element.rule: if curr_section is IniParser.Element.rule:
new_rule.set_parameter(key, value) new_rule.set_parameter(key, value)
elif curr_section is IniParser.Element.cond: elif curr_section is IniParser.Element.cond:
if key == 'source': if key == "source":
if value == 'LOG': if value == "LOG":
new_cond = LogCondition.create(new_cond) new_cond = LogCondition.create(new_cond)
elif value == 'OPTIONS': elif value == "OPTIONS":
new_cond = OptionCondition.create(new_cond) new_cond = OptionCondition.create(new_cond)
elif value == 'TIME_SERIES': elif value == "TIME_SERIES":
new_cond = TimeSeriesCondition.create(new_cond) new_cond = TimeSeriesCondition.create(new_cond)
else: else:
new_cond.set_parameter(key, value) new_cond.set_parameter(key, value)
@ -515,14 +497,14 @@ class RulesSpec:
def print_rules(self, rules): def print_rules(self, rules):
for rule in rules: for rule in rules:
print('\nRule: ' + rule.name) print("\nRule: " + rule.name)
for cond_name in rule.conditions: for cond_name in rule.conditions:
print(repr(self.conditions_dict[cond_name])) print(repr(self.conditions_dict[cond_name]))
for sugg_name in rule.suggestions: for sugg_name in rule.suggestions:
print(repr(self.suggestions_dict[sugg_name])) print(repr(self.suggestions_dict[sugg_name]))
if rule.trigger_entities: if rule.trigger_entities:
print('scope: entities:') print("scope: entities:")
print(rule.trigger_entities) print(rule.trigger_entities)
if rule.trigger_column_families: if rule.trigger_column_families:
print('scope: col_fam:') print("scope: col_fam:")
print(rule.trigger_column_families) print(rule.trigger_column_families)

@ -3,11 +3,12 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.rule_parser import RulesSpec import argparse
from advisor.db_log_parser import DatabaseLogs, DataSource from advisor.db_log_parser import DatabaseLogs, DataSource
from advisor.db_options_parser import DatabaseOptions from advisor.db_options_parser import DatabaseOptions
from advisor.db_stats_fetcher import LogStatsParser, OdsStatsFetcher from advisor.db_stats_fetcher import LogStatsParser, OdsStatsFetcher
import argparse from advisor.rule_parser import RulesSpec
def main(args): def main(args):
@ -18,9 +19,7 @@ def main(args):
# initialize the DatabaseOptions object # initialize the DatabaseOptions object
db_options = DatabaseOptions(args.rocksdb_options) db_options = DatabaseOptions(args.rocksdb_options)
# Create DatabaseLogs object # Create DatabaseLogs object
db_logs = DatabaseLogs( db_logs = DatabaseLogs(args.log_files_path_prefix, db_options.get_column_families())
args.log_files_path_prefix, db_options.get_column_families()
)
# Create the Log STATS object # Create the Log STATS object
db_log_stats = LogStatsParser( db_log_stats = LogStatsParser(
args.log_files_path_prefix, args.stats_dump_period_sec args.log_files_path_prefix, args.stats_dump_period_sec
@ -28,62 +27,72 @@ def main(args):
data_sources = { data_sources = {
DataSource.Type.DB_OPTIONS: [db_options], DataSource.Type.DB_OPTIONS: [db_options],
DataSource.Type.LOG: [db_logs], DataSource.Type.LOG: [db_logs],
DataSource.Type.TIME_SERIES: [db_log_stats] DataSource.Type.TIME_SERIES: [db_log_stats],
} }
if args.ods_client: if args.ods_client:
data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher( data_sources[DataSource.Type.TIME_SERIES].append(
args.ods_client, OdsStatsFetcher(
args.ods_entity, args.ods_client,
args.ods_tstart, args.ods_entity,
args.ods_tend, args.ods_tstart,
args.ods_key_prefix args.ods_tend,
)) args.ods_key_prefix,
)
)
triggered_rules = rule_spec_parser.get_triggered_rules( triggered_rules = rule_spec_parser.get_triggered_rules(
data_sources, db_options.get_column_families() data_sources, db_options.get_column_families()
) )
rule_spec_parser.print_rules(triggered_rules) rule_spec_parser.print_rules(triggered_rules)
if __name__ == '__main__': if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Use this script to get\ parser = argparse.ArgumentParser(
suggestions for improving Rocksdb performance.') description="Use this script to get\
parser.add_argument( suggestions for improving Rocksdb performance."
'--rules_spec', required=True, type=str,
help='path of the file containing the expert-specified Rules'
) )
parser.add_argument( parser.add_argument(
'--rocksdb_options', required=True, type=str, "--rules_spec",
help='path of the starting Rocksdb OPTIONS file' required=True,
type=str,
help="path of the file containing the expert-specified Rules",
) )
parser.add_argument( parser.add_argument(
'--log_files_path_prefix', required=True, type=str, "--rocksdb_options",
help='path prefix of the Rocksdb LOG files' required=True,
type=str,
help="path of the starting Rocksdb OPTIONS file",
) )
parser.add_argument( parser.add_argument(
'--stats_dump_period_sec', required=True, type=int, "--log_files_path_prefix",
help='the frequency (in seconds) at which STATISTICS are printed to ' + required=True,
'the Rocksdb LOG file' type=str,
help="path prefix of the Rocksdb LOG files",
) )
# ODS arguments
parser.add_argument( parser.add_argument(
'--ods_client', type=str, help='the ODS client binary' "--stats_dump_period_sec",
required=True,
type=int,
help="the frequency (in seconds) at which STATISTICS are printed to "
+ "the Rocksdb LOG file",
) )
# ODS arguments
parser.add_argument("--ods_client", type=str, help="the ODS client binary")
parser.add_argument( parser.add_argument(
'--ods_entity', type=str, "--ods_entity",
help='the servers for which the ODS stats need to be fetched' type=str,
help="the servers for which the ODS stats need to be fetched",
) )
parser.add_argument( parser.add_argument(
'--ods_key_prefix', type=str, "--ods_key_prefix",
help='the prefix that needs to be attached to the keys of time ' + type=str,
'series to be fetched from ODS' help="the prefix that needs to be attached to the keys of time "
+ "series to be fetched from ODS",
) )
parser.add_argument( parser.add_argument(
'--ods_tstart', type=int, "--ods_tstart", type=int, help="start time of timeseries to be fetched from ODS"
help='start time of timeseries to be fetched from ODS'
) )
parser.add_argument( parser.add_argument(
'--ods_tend', type=int, "--ods_tend", type=int, help="end time of timeseries to be fetched from ODS"
help='end time of timeseries to be fetched from ODS'
) )
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)

@ -3,24 +3,25 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.db_bench_runner import DBBenchRunner
from advisor.db_log_parser import NO_COL_FAMILY, DataSource
from advisor.db_options_parser import DatabaseOptions
import os import os
import unittest import unittest
from advisor.db_bench_runner import DBBenchRunner
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions
class TestDBBenchRunnerMethods(unittest.TestCase): class TestDBBenchRunnerMethods(unittest.TestCase):
def setUp(self): def setUp(self):
self.pos_args = [ self.pos_args = [
'./../../db_bench', "./../../db_bench",
'overwrite', "overwrite",
'use_existing_db=true', "use_existing_db=true",
'duration=10' "duration=10",
] ]
self.bench_runner = DBBenchRunner(self.pos_args) self.bench_runner = DBBenchRunner(self.pos_args)
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
self.db_options = DatabaseOptions(options_path) self.db_options = DatabaseOptions(options_path)
def test_setup(self): def test_setup(self):
@ -31,71 +32,70 @@ class TestDBBenchRunnerMethods(unittest.TestCase):
) )
def test_get_info_log_file_name(self): def test_get_info_log_file_name(self):
log_file_name = DBBenchRunner.get_info_log_file_name( log_file_name = DBBenchRunner.get_info_log_file_name(None, "random_path")
None, 'random_path' self.assertEqual(log_file_name, "LOG")
)
self.assertEqual(log_file_name, 'LOG')
log_file_name = DBBenchRunner.get_info_log_file_name( log_file_name = DBBenchRunner.get_info_log_file_name(
'/dev/shm/', '/tmp/rocksdbtest-155919/dbbench/' "/dev/shm/", "/tmp/rocksdbtest-155919/dbbench/"
) )
self.assertEqual(log_file_name, 'tmp_rocksdbtest-155919_dbbench_LOG') self.assertEqual(log_file_name, "tmp_rocksdbtest-155919_dbbench_LOG")
def test_get_opt_args_str(self): def test_get_opt_args_str(self):
misc_opt_dict = {'bloom_bits': 2, 'empty_opt': None, 'rate_limiter': 3} misc_opt_dict = {"bloom_bits": 2, "empty_opt": None, "rate_limiter": 3}
optional_args_str = DBBenchRunner.get_opt_args_str(misc_opt_dict) optional_args_str = DBBenchRunner.get_opt_args_str(misc_opt_dict)
self.assertEqual(optional_args_str, ' --bloom_bits=2 --rate_limiter=3') self.assertEqual(optional_args_str, " --bloom_bits=2 --rate_limiter=3")
def test_get_log_options(self): def test_get_log_options(self):
db_path = '/tmp/rocksdb-155919/dbbench' db_path = "/tmp/rocksdb-155919/dbbench"
# when db_log_dir is present in the db_options # when db_log_dir is present in the db_options
update_dict = { update_dict = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'}, "DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"},
'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '20'} "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "20"},
} }
self.db_options.update_options(update_dict) self.db_options.update_options(update_dict)
log_file_prefix, stats_freq = self.bench_runner.get_log_options( log_file_prefix, stats_freq = self.bench_runner.get_log_options(
self.db_options, db_path self.db_options, db_path
) )
self.assertEqual( self.assertEqual(log_file_prefix, "/dev/shm/tmp_rocksdb-155919_dbbench_LOG")
log_file_prefix, '/dev/shm/tmp_rocksdb-155919_dbbench_LOG'
)
self.assertEqual(stats_freq, 20) self.assertEqual(stats_freq, 20)
update_dict = { update_dict = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: None}, "DBOptions.db_log_dir": {NO_COL_FAMILY: None},
'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '30'} "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "30"},
} }
self.db_options.update_options(update_dict) self.db_options.update_options(update_dict)
log_file_prefix, stats_freq = self.bench_runner.get_log_options( log_file_prefix, stats_freq = self.bench_runner.get_log_options(
self.db_options, db_path self.db_options, db_path
) )
self.assertEqual(log_file_prefix, '/tmp/rocksdb-155919/dbbench/LOG') self.assertEqual(log_file_prefix, "/tmp/rocksdb-155919/dbbench/LOG")
self.assertEqual(stats_freq, 30) self.assertEqual(stats_freq, 30)
def test_build_experiment_command(self): def test_build_experiment_command(self):
# add some misc_options to db_options # add some misc_options to db_options
update_dict = { update_dict = {
'bloom_bits': {NO_COL_FAMILY: 2}, "bloom_bits": {NO_COL_FAMILY: 2},
'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: 128000000} "rate_limiter_bytes_per_sec": {NO_COL_FAMILY: 128000000},
} }
self.db_options.update_options(update_dict) self.db_options.update_options(update_dict)
db_path = '/dev/shm' db_path = "/dev/shm"
experiment_command = self.bench_runner._build_experiment_command( experiment_command = self.bench_runner._build_experiment_command(
self.db_options, db_path self.db_options, db_path
) )
opt_args_str = DBBenchRunner.get_opt_args_str( opt_args_str = DBBenchRunner.get_opt_args_str(
self.db_options.get_misc_options() self.db_options.get_misc_options()
) )
opt_args_str += ( opt_args_str += " --options_file=" + self.db_options.generate_options_config(
' --options_file=' + "12345"
self.db_options.generate_options_config('12345')
) )
for arg in self.pos_args[2:]: for arg in self.pos_args[2:]:
opt_args_str += (' --' + arg) opt_args_str += " --" + arg
expected_command = ( expected_command = (
self.pos_args[0] + ' --benchmarks=' + self.pos_args[1] + self.pos_args[0]
' --statistics --perf_level=3 --db=' + db_path + opt_args_str + " --benchmarks="
+ self.pos_args[1]
+ " --statistics --perf_level=3 --db="
+ db_path
+ opt_args_str
) )
self.assertEqual(experiment_command, expected_command) self.assertEqual(experiment_command, expected_command)
@ -104,44 +104,38 @@ class TestDBBenchRunner(unittest.TestCase):
def setUp(self): def setUp(self):
# Note: the db_bench binary should be present in the rocksdb/ directory # Note: the db_bench binary should be present in the rocksdb/ directory
self.pos_args = [ self.pos_args = [
'./../../db_bench', "./../../db_bench",
'overwrite', "overwrite",
'use_existing_db=true', "use_existing_db=true",
'duration=20' "duration=20",
] ]
self.bench_runner = DBBenchRunner(self.pos_args) self.bench_runner = DBBenchRunner(self.pos_args)
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
self.db_options = DatabaseOptions(options_path) self.db_options = DatabaseOptions(options_path)
def test_experiment_output(self): def test_experiment_output(self):
update_dict = {'bloom_bits': {NO_COL_FAMILY: 2}} update_dict = {"bloom_bits": {NO_COL_FAMILY: 2}}
self.db_options.update_options(update_dict) self.db_options.update_options(update_dict)
db_path = '/dev/shm' db_path = "/dev/shm"
data_sources, throughput = self.bench_runner.run_experiment( data_sources, throughput = self.bench_runner.run_experiment(
self.db_options, db_path self.db_options, db_path
) )
self.assertEqual( self.assertEqual(
data_sources[DataSource.Type.DB_OPTIONS][0].type, data_sources[DataSource.Type.DB_OPTIONS][0].type, DataSource.Type.DB_OPTIONS
DataSource.Type.DB_OPTIONS
)
self.assertEqual(
data_sources[DataSource.Type.LOG][0].type,
DataSource.Type.LOG
) )
self.assertEqual(data_sources[DataSource.Type.LOG][0].type, DataSource.Type.LOG)
self.assertEqual(len(data_sources[DataSource.Type.TIME_SERIES]), 2) self.assertEqual(len(data_sources[DataSource.Type.TIME_SERIES]), 2)
self.assertEqual( self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][0].type, data_sources[DataSource.Type.TIME_SERIES][0].type,
DataSource.Type.TIME_SERIES DataSource.Type.TIME_SERIES,
) )
self.assertEqual( self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][1].type, data_sources[DataSource.Type.TIME_SERIES][1].type,
DataSource.Type.TIME_SERIES DataSource.Type.TIME_SERIES,
)
self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0
) )
self.assertEqual(data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0)
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()

@ -3,52 +3,49 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
from advisor.rule_parser import Condition, LogCondition
import os import os
import unittest import unittest
from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
from advisor.rule_parser import Condition, LogCondition
class TestLog(unittest.TestCase): class TestLog(unittest.TestCase):
def setUp(self): def setUp(self):
self.column_families = ['default', 'col_fam_A'] self.column_families = ["default", "col_fam_A"]
def test_get_column_family(self): def test_get_column_family(self):
test_log = ( test_log = (
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] "
"[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK" + "[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK"
) )
db_log = Log(test_log, self.column_families) db_log = Log(test_log, self.column_families)
self.assertEqual('col_fam_A', db_log.get_column_family()) self.assertEqual("col_fam_A", db_log.get_column_family())
test_log = ( test_log = (
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " + "2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] "
"[JOB 44] Level-0 flush table #84: 1890780 bytes OK" + "[JOB 44] Level-0 flush table #84: 1890780 bytes OK"
) )
db_log = Log(test_log, self.column_families) db_log = Log(test_log, self.column_families)
db_log.append_message('[default] some remaining part of log') db_log.append_message("[default] some remaining part of log")
self.assertEqual(NO_COL_FAMILY, db_log.get_column_family()) self.assertEqual(NO_COL_FAMILY, db_log.get_column_family())
def test_get_methods(self): def test_get_methods(self):
hr_time = "2018/05/25-14:30:25.491635" hr_time = "2018/05/25-14:30:25.491635"
context = "7f82ba72e700" context = "7f82ba72e700"
message = ( message = (
"[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " + "[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table "
"#23: started" + "#23: started"
) )
test_log = hr_time + " " + context + " " + message test_log = hr_time + " " + context + " " + message
db_log = Log(test_log, self.column_families) db_log = Log(test_log, self.column_families)
self.assertEqual(db_log.get_message(), message) self.assertEqual(db_log.get_message(), message)
remaining_message = "[col_fam_A] some more logs" remaining_message = "[col_fam_A] some more logs"
db_log.append_message(remaining_message) db_log.append_message(remaining_message)
self.assertEqual( self.assertEqual(db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635")
db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635"
)
self.assertEqual(db_log.get_context(), "7f82ba72e700") self.assertEqual(db_log.get_context(), "7f82ba72e700")
self.assertEqual(db_log.get_timestamp(), 1527258625) self.assertEqual(db_log.get_timestamp(), 1527258625)
self.assertEqual( self.assertEqual(db_log.get_message(), str(message + "\n" + remaining_message))
db_log.get_message(), str(message + '\n' + remaining_message)
)
def test_is_new_log(self): def test_is_new_log(self):
new_log = "2018/05/25-14:34:21.047233 context random new log" new_log = "2018/05/25-14:34:21.047233 context random new log"
@ -60,44 +57,40 @@ class TestLog(unittest.TestCase):
class TestDatabaseLogs(unittest.TestCase): class TestDatabaseLogs(unittest.TestCase):
def test_check_and_trigger_conditions(self): def test_check_and_trigger_conditions(self):
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
logs_path_prefix = os.path.join(this_path, 'input_files/LOG-0') logs_path_prefix = os.path.join(this_path, "input_files/LOG-0")
column_families = ['default', 'col-fam-A', 'col-fam-B'] column_families = ["default", "col-fam-A", "col-fam-B"]
db_logs = DatabaseLogs(logs_path_prefix, column_families) db_logs = DatabaseLogs(logs_path_prefix, column_families)
# matches, has 2 col_fams # matches, has 2 col_fams
condition1 = LogCondition.create(Condition('cond-A')) condition1 = LogCondition.create(Condition("cond-A"))
condition1.set_parameter('regex', 'random log message') condition1.set_parameter("regex", "random log message")
# matches, multiple lines message # matches, multiple lines message
condition2 = LogCondition.create(Condition('cond-B')) condition2 = LogCondition.create(Condition("cond-B"))
condition2.set_parameter('regex', 'continuing on next line') condition2.set_parameter("regex", "continuing on next line")
# does not match # does not match
condition3 = LogCondition.create(Condition('cond-C')) condition3 = LogCondition.create(Condition("cond-C"))
condition3.set_parameter('regex', 'this should match no log') condition3.set_parameter("regex", "this should match no log")
db_logs.check_and_trigger_conditions( db_logs.check_and_trigger_conditions([condition1, condition2, condition3])
[condition1, condition2, condition3]
)
cond1_trigger = condition1.get_trigger() cond1_trigger = condition1.get_trigger()
self.assertEqual(2, len(cond1_trigger.keys())) self.assertEqual(2, len(cond1_trigger.keys()))
self.assertSetEqual( self.assertSetEqual({"col-fam-A", NO_COL_FAMILY}, set(cond1_trigger.keys()))
{'col-fam-A', NO_COL_FAMILY}, set(cond1_trigger.keys()) self.assertEqual(2, len(cond1_trigger["col-fam-A"]))
)
self.assertEqual(2, len(cond1_trigger['col-fam-A']))
messages = [ messages = [
"[db/db_impl.cc:563] [col-fam-A] random log message for testing", "[db/db_impl.cc:563] [col-fam-A] random log message for testing",
"[db/db_impl.cc:653] [col-fam-A] another random log message" "[db/db_impl.cc:653] [col-fam-A] another random log message",
] ]
self.assertIn(cond1_trigger['col-fam-A'][0].get_message(), messages) self.assertIn(cond1_trigger["col-fam-A"][0].get_message(), messages)
self.assertIn(cond1_trigger['col-fam-A'][1].get_message(), messages) self.assertIn(cond1_trigger["col-fam-A"][1].get_message(), messages)
self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY])) self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY]))
self.assertEqual( self.assertEqual(
cond1_trigger[NO_COL_FAMILY][0].get_message(), cond1_trigger[NO_COL_FAMILY][0].get_message(),
"[db/db_impl.cc:331] [unknown] random log message no column family" "[db/db_impl.cc:331] [unknown] random log message no column family",
) )
cond2_trigger = condition2.get_trigger() cond2_trigger = condition2.get_trigger()
self.assertEqual(['col-fam-B'], list(cond2_trigger.keys())) self.assertEqual(["col-fam-B"], list(cond2_trigger.keys()))
self.assertEqual(1, len(cond2_trigger['col-fam-B'])) self.assertEqual(1, len(cond2_trigger["col-fam-B"]))
self.assertEqual( self.assertEqual(
cond2_trigger['col-fam-B'][0].get_message(), cond2_trigger["col-fam-B"][0].get_message(),
"[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" + "[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n"
"remaining part of the log" + "remaining part of the log",
) )
self.assertIsNone(condition3.get_trigger()) self.assertIsNone(condition3.get_trigger())

@ -3,105 +3,107 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
import os
import unittest
from advisor.db_log_parser import NO_COL_FAMILY from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import Condition, OptionCondition from advisor.rule_parser import Condition, OptionCondition
import os
import unittest
class TestDatabaseOptions(unittest.TestCase): class TestDatabaseOptions(unittest.TestCase):
def setUp(self): def setUp(self):
self.this_path = os.path.abspath(os.path.dirname(__file__)) self.this_path = os.path.abspath(os.path.dirname(__file__))
self.og_options = os.path.join( self.og_options = os.path.join(self.this_path, "input_files/OPTIONS-000005")
self.this_path, 'input_files/OPTIONS-000005' misc_options = ["bloom_bits = 4", "rate_limiter_bytes_per_sec = 1024000"]
)
misc_options = [
'bloom_bits = 4', 'rate_limiter_bytes_per_sec = 1024000'
]
# create the options object # create the options object
self.db_options = DatabaseOptions(self.og_options, misc_options) self.db_options = DatabaseOptions(self.og_options, misc_options)
# perform clean-up before running tests # perform clean-up before running tests
self.generated_options = os.path.join( self.generated_options = os.path.join(
self.this_path, '../temp/OPTIONS_testing.tmp' self.this_path, "../temp/OPTIONS_testing.tmp"
) )
if os.path.isfile(self.generated_options): if os.path.isfile(self.generated_options):
os.remove(self.generated_options) os.remove(self.generated_options)
def test_get_options_diff(self): def test_get_options_diff(self):
old_opt = { old_opt = {
'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: '20'}, "DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: "20"},
'CFOptions.write_buffer_size': { "CFOptions.write_buffer_size": {
'default': '1024000', "default": "1024000",
'col_fam_A': '128000', "col_fam_A": "128000",
'col_fam_B': '128000000' "col_fam_B": "128000000",
}, },
'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'}, "DBOptions.use_fsync": {NO_COL_FAMILY: "true"},
'DBOptions.max_log_file_size': {NO_COL_FAMILY: '128000000'} "DBOptions.max_log_file_size": {NO_COL_FAMILY: "128000000"},
} }
new_opt = { new_opt = {
'bloom_bits': {NO_COL_FAMILY: '4'}, "bloom_bits": {NO_COL_FAMILY: "4"},
'CFOptions.write_buffer_size': { "CFOptions.write_buffer_size": {
'default': '128000000', "default": "128000000",
'col_fam_A': '128000', "col_fam_A": "128000",
'col_fam_C': '128000000' "col_fam_C": "128000000",
}, },
'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'}, "DBOptions.use_fsync": {NO_COL_FAMILY: "true"},
'DBOptions.max_log_file_size': {NO_COL_FAMILY: '0'} "DBOptions.max_log_file_size": {NO_COL_FAMILY: "0"},
} }
diff = DatabaseOptions.get_options_diff(old_opt, new_opt) diff = DatabaseOptions.get_options_diff(old_opt, new_opt)
expected_diff = { expected_diff = {
'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: ('20', None)}, "DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: ("20", None)},
'bloom_bits': {NO_COL_FAMILY: (None, '4')}, "bloom_bits": {NO_COL_FAMILY: (None, "4")},
'CFOptions.write_buffer_size': { "CFOptions.write_buffer_size": {
'default': ('1024000', '128000000'), "default": ("1024000", "128000000"),
'col_fam_B': ('128000000', None), "col_fam_B": ("128000000", None),
'col_fam_C': (None, '128000000') "col_fam_C": (None, "128000000"),
}, },
'DBOptions.max_log_file_size': {NO_COL_FAMILY: ('128000000', '0')} "DBOptions.max_log_file_size": {NO_COL_FAMILY: ("128000000", "0")},
} }
self.assertDictEqual(diff, expected_diff) self.assertDictEqual(diff, expected_diff)
def test_is_misc_option(self): def test_is_misc_option(self):
self.assertTrue(DatabaseOptions.is_misc_option('bloom_bits')) self.assertTrue(DatabaseOptions.is_misc_option("bloom_bits"))
self.assertFalse( self.assertFalse(
DatabaseOptions.is_misc_option('DBOptions.stats_dump_freq_sec') DatabaseOptions.is_misc_option("DBOptions.stats_dump_freq_sec")
) )
def test_set_up(self): def test_set_up(self):
options = self.db_options.get_all_options() options = self.db_options.get_all_options()
self.assertEqual(22, len(options.keys())) self.assertEqual(22, len(options.keys()))
expected_misc_options = { expected_misc_options = {
'bloom_bits': '4', 'rate_limiter_bytes_per_sec': '1024000' "bloom_bits": "4",
"rate_limiter_bytes_per_sec": "1024000",
} }
self.assertDictEqual( self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options())
expected_misc_options, self.db_options.get_misc_options()
)
self.assertListEqual( self.assertListEqual(
['default', 'col_fam_A'], self.db_options.get_column_families() ["default", "col_fam_A"], self.db_options.get_column_families()
) )
def test_get_options(self): def test_get_options(self):
opt_to_get = [ opt_to_get = [
'DBOptions.manual_wal_flush', 'DBOptions.db_write_buffer_size', "DBOptions.manual_wal_flush",
'bloom_bits', 'CFOptions.compaction_filter_factory', "DBOptions.db_write_buffer_size",
'CFOptions.num_levels', 'rate_limiter_bytes_per_sec', "bloom_bits",
'TableOptions.BlockBasedTable.block_align', 'random_option' "CFOptions.compaction_filter_factory",
"CFOptions.num_levels",
"rate_limiter_bytes_per_sec",
"TableOptions.BlockBasedTable.block_align",
"random_option",
] ]
options = self.db_options.get_options(opt_to_get) options = self.db_options.get_options(opt_to_get)
expected_options = { expected_options = {
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'}, "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"},
'DBOptions.db_write_buffer_size': {NO_COL_FAMILY: '0'}, "DBOptions.db_write_buffer_size": {NO_COL_FAMILY: "0"},
'bloom_bits': {NO_COL_FAMILY: '4'}, "bloom_bits": {NO_COL_FAMILY: "4"},
'CFOptions.compaction_filter_factory': { "CFOptions.compaction_filter_factory": {
'default': 'nullptr', 'col_fam_A': 'nullptr' "default": "nullptr",
"col_fam_A": "nullptr",
},
"CFOptions.num_levels": {"default": "7", "col_fam_A": "5"},
"rate_limiter_bytes_per_sec": {NO_COL_FAMILY: "1024000"},
"TableOptions.BlockBasedTable.block_align": {
"default": "false",
"col_fam_A": "true",
}, },
'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'},
'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: '1024000'},
'TableOptions.BlockBasedTable.block_align': {
'default': 'false', 'col_fam_A': 'true'
}
} }
self.assertDictEqual(expected_options, options) self.assertDictEqual(expected_options, options)
@ -109,108 +111,104 @@ class TestDatabaseOptions(unittest.TestCase):
# add new, update old, set old # add new, update old, set old
# before updating # before updating
expected_old_opts = { expected_old_opts = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: None}, "DBOptions.db_log_dir": {NO_COL_FAMILY: None},
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'}, "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"},
'bloom_bits': {NO_COL_FAMILY: '4'}, "bloom_bits": {NO_COL_FAMILY: "4"},
'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'}, "CFOptions.num_levels": {"default": "7", "col_fam_A": "5"},
'TableOptions.BlockBasedTable.block_restart_interval': { "TableOptions.BlockBasedTable.block_restart_interval": {"col_fam_A": "16"},
'col_fam_A': '16'
}
} }
get_opts = list(expected_old_opts.keys()) get_opts = list(expected_old_opts.keys())
options = self.db_options.get_options(get_opts) options = self.db_options.get_options(get_opts)
self.assertEqual(expected_old_opts, options) self.assertEqual(expected_old_opts, options)
# after updating options # after updating options
update_opts = { update_opts = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'}, "DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"},
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'true'}, "DBOptions.manual_wal_flush": {NO_COL_FAMILY: "true"},
'bloom_bits': {NO_COL_FAMILY: '2'}, "bloom_bits": {NO_COL_FAMILY: "2"},
'CFOptions.num_levels': {'col_fam_A': '7'}, "CFOptions.num_levels": {"col_fam_A": "7"},
'TableOptions.BlockBasedTable.block_restart_interval': { "TableOptions.BlockBasedTable.block_restart_interval": {"default": "32"},
'default': '32' "random_misc_option": {NO_COL_FAMILY: "something"},
},
'random_misc_option': {NO_COL_FAMILY: 'something'}
} }
self.db_options.update_options(update_opts) self.db_options.update_options(update_opts)
update_opts['CFOptions.num_levels']['default'] = '7' update_opts["CFOptions.num_levels"]["default"] = "7"
update_opts['TableOptions.BlockBasedTable.block_restart_interval'] = { update_opts["TableOptions.BlockBasedTable.block_restart_interval"] = {
'default': '32', 'col_fam_A': '16' "default": "32",
"col_fam_A": "16",
} }
get_opts.append('random_misc_option') get_opts.append("random_misc_option")
options = self.db_options.get_options(get_opts) options = self.db_options.get_options(get_opts)
self.assertDictEqual(update_opts, options) self.assertDictEqual(update_opts, options)
expected_misc_options = { expected_misc_options = {
'bloom_bits': '2', "bloom_bits": "2",
'rate_limiter_bytes_per_sec': '1024000', "rate_limiter_bytes_per_sec": "1024000",
'random_misc_option': 'something' "random_misc_option": "something",
} }
self.assertDictEqual( self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options())
expected_misc_options, self.db_options.get_misc_options()
)
def test_generate_options_config(self): def test_generate_options_config(self):
# make sure file does not exist from before # make sure file does not exist from before
self.assertFalse(os.path.isfile(self.generated_options)) self.assertFalse(os.path.isfile(self.generated_options))
self.db_options.generate_options_config('testing') self.db_options.generate_options_config("testing")
self.assertTrue(os.path.isfile(self.generated_options)) self.assertTrue(os.path.isfile(self.generated_options))
def test_check_and_trigger_conditions(self): def test_check_and_trigger_conditions(self):
# options only from CFOptions # options only from CFOptions
# setup the OptionCondition objects to check and trigger # setup the OptionCondition objects to check and trigger
update_dict = { update_dict = {
'CFOptions.level0_file_num_compaction_trigger': {'col_fam_A': '4'}, "CFOptions.level0_file_num_compaction_trigger": {"col_fam_A": "4"},
'CFOptions.max_bytes_for_level_base': {'col_fam_A': '10'} "CFOptions.max_bytes_for_level_base": {"col_fam_A": "10"},
} }
self.db_options.update_options(update_dict) self.db_options.update_options(update_dict)
cond1 = Condition('opt-cond-1') cond1 = Condition("opt-cond-1")
cond1 = OptionCondition.create(cond1) cond1 = OptionCondition.create(cond1)
cond1.set_parameter( cond1.set_parameter(
'options', [ "options",
'CFOptions.level0_file_num_compaction_trigger', [
'TableOptions.BlockBasedTable.block_restart_interval', "CFOptions.level0_file_num_compaction_trigger",
'CFOptions.max_bytes_for_level_base' "TableOptions.BlockBasedTable.block_restart_interval",
] "CFOptions.max_bytes_for_level_base",
],
) )
cond1.set_parameter( cond1.set_parameter(
'evaluate', "evaluate", "int(options[0])*int(options[1])-int(options[2])>=0"
'int(options[0])*int(options[1])-int(options[2])>=0'
) )
# only DBOptions # only DBOptions
cond2 = Condition('opt-cond-2') cond2 = Condition("opt-cond-2")
cond2 = OptionCondition.create(cond2) cond2 = OptionCondition.create(cond2)
cond2.set_parameter( cond2.set_parameter(
'options', [ "options",
'DBOptions.db_write_buffer_size', [
'bloom_bits', "DBOptions.db_write_buffer_size",
'rate_limiter_bytes_per_sec' "bloom_bits",
] "rate_limiter_bytes_per_sec",
],
) )
cond2.set_parameter( cond2.set_parameter(
'evaluate', "evaluate", "(int(options[2]) * int(options[1]) * int(options[0]))==0"
'(int(options[2]) * int(options[1]) * int(options[0]))==0'
) )
# mix of CFOptions and DBOptions # mix of CFOptions and DBOptions
cond3 = Condition('opt-cond-3') cond3 = Condition("opt-cond-3")
cond3 = OptionCondition.create(cond3) cond3 = OptionCondition.create(cond3)
cond3.set_parameter( cond3.set_parameter(
'options', [ "options",
'DBOptions.db_write_buffer_size', # 0 [
'CFOptions.num_levels', # 5, 7 "DBOptions.db_write_buffer_size", # 0
'bloom_bits' # 4 "CFOptions.num_levels", # 5, 7
] "bloom_bits", # 4
],
) )
cond3.set_parameter( cond3.set_parameter(
'evaluate', 'int(options[2])*int(options[0])+int(options[1])>6' "evaluate", "int(options[2])*int(options[0])+int(options[1])>6"
) )
self.db_options.check_and_trigger_conditions([cond1, cond2, cond3]) self.db_options.check_and_trigger_conditions([cond1, cond2, cond3])
cond1_trigger = {'col_fam_A': ['4', '16', '10']} cond1_trigger = {"col_fam_A": ["4", "16", "10"]}
self.assertDictEqual(cond1_trigger, cond1.get_trigger()) self.assertDictEqual(cond1_trigger, cond1.get_trigger())
cond2_trigger = {NO_COL_FAMILY: ['0', '4', '1024000']} cond2_trigger = {NO_COL_FAMILY: ["0", "4", "1024000"]}
self.assertDictEqual(cond2_trigger, cond2.get_trigger()) self.assertDictEqual(cond2_trigger, cond2.get_trigger())
cond3_trigger = {'default': ['0', '7', '4']} cond3_trigger = {"default": ["0", "7", "4"]}
self.assertDictEqual(cond3_trigger, cond3.get_trigger()) self.assertDictEqual(cond3_trigger, cond3.get_trigger())
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()

@ -3,49 +3,46 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
from advisor.db_stats_fetcher import LogStatsParser, DatabasePerfContext
from advisor.db_timeseries_parser import NO_ENTITY
from advisor.rule_parser import Condition, TimeSeriesCondition
import os import os
import time import time
import unittest import unittest
from unittest.mock import MagicMock from unittest.mock import MagicMock
from advisor.db_stats_fetcher import DatabasePerfContext, LogStatsParser
from advisor.db_timeseries_parser import NO_ENTITY
from advisor.rule_parser import Condition, TimeSeriesCondition
class TestLogStatsParser(unittest.TestCase): class TestLogStatsParser(unittest.TestCase):
def setUp(self): def setUp(self):
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
stats_file = os.path.join( stats_file = os.path.join(this_path, "input_files/log_stats_parser_keys_ts")
this_path, 'input_files/log_stats_parser_keys_ts'
)
# populate the keys_ts dictionary of LogStatsParser # populate the keys_ts dictionary of LogStatsParser
self.stats_dict = {NO_ENTITY: {}} self.stats_dict = {NO_ENTITY: {}}
with open(stats_file, 'r') as fp: with open(stats_file, "r") as fp:
for line in fp: for line in fp:
stat_name = line.split(':')[0].strip() stat_name = line.split(":")[0].strip()
self.stats_dict[NO_ENTITY][stat_name] = {} self.stats_dict[NO_ENTITY][stat_name] = {}
token_list = line.split(':')[1].strip().split(',') token_list = line.split(":")[1].strip().split(",")
for token in token_list: for token in token_list:
timestamp = int(token.split()[0]) timestamp = int(token.split()[0])
value = float(token.split()[1]) value = float(token.split()[1])
self.stats_dict[NO_ENTITY][stat_name][timestamp] = value self.stats_dict[NO_ENTITY][stat_name][timestamp] = value
self.log_stats_parser = LogStatsParser('dummy_log_file', 20) self.log_stats_parser = LogStatsParser("dummy_log_file", 20)
self.log_stats_parser.keys_ts = self.stats_dict self.log_stats_parser.keys_ts = self.stats_dict
def test_check_and_trigger_conditions_bursty(self): def test_check_and_trigger_conditions_bursty(self):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated # mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock() self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: bursty # condition: bursty
cond1 = Condition('cond-1') cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1) cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter('behavior', 'bursty') cond1.set_parameter("behavior", "bursty")
cond1.set_parameter('window_sec', 40) cond1.set_parameter("window_sec", 40)
cond1.set_parameter('rate_threshold', 0) cond1.set_parameter("rate_threshold", 0)
self.log_stats_parser.check_and_trigger_conditions([cond1]) self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = { expected_cond_trigger = {NO_ENTITY: {1530896440: 0.9767546362322214}}
NO_ENTITY: {1530896440: 0.9767546362322214}
}
self.assertDictEqual(expected_cond_trigger, cond1.get_trigger()) self.assertDictEqual(expected_cond_trigger, cond1.get_trigger())
# ensure that fetch_timeseries() was called once # ensure that fetch_timeseries() was called once
self.log_stats_parser.fetch_timeseries.assert_called_once() self.log_stats_parser.fetch_timeseries.assert_called_once()
@ -54,23 +51,20 @@ class TestLogStatsParser(unittest.TestCase):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated # mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock() self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: evaluate_expression # condition: evaluate_expression
cond1 = Condition('cond-1') cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1) cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter('behavior', 'evaluate_expression') cond1.set_parameter("behavior", "evaluate_expression")
keys = [ keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"]
'rocksdb.manifest.file.sync.micros.p99', cond1.set_parameter("keys", keys)
'rocksdb.db.get.micros.p50' cond1.set_parameter("aggregation_op", "latest")
]
cond1.set_parameter('keys', keys)
cond1.set_parameter('aggregation_op', 'latest')
# condition evaluates to FALSE # condition evaluates to FALSE
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>200') cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>200")
self.log_stats_parser.check_and_trigger_conditions([cond1]) self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]} expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]}
self.assertIsNone(cond1.get_trigger()) self.assertIsNone(cond1.get_trigger())
# condition evaluates to TRUE # condition evaluates to TRUE
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)<200') cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)<200")
self.log_stats_parser.check_and_trigger_conditions([cond1]) self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]} expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]}
self.assertDictEqual(expected_cond_trigger, cond1.get_trigger()) self.assertDictEqual(expected_cond_trigger, cond1.get_trigger())
@ -81,23 +75,22 @@ class TestLogStatsParser(unittest.TestCase):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated # mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock() self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: evaluate_expression # condition: evaluate_expression
cond1 = Condition('cond-1') cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1) cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50') cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter('behavior', 'evaluate_expression') cond1.set_parameter("behavior", "evaluate_expression")
keys = [ keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"]
'rocksdb.manifest.file.sync.micros.p99', cond1.set_parameter("keys", keys)
'rocksdb.db.get.micros.p50' cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>500")
]
cond1.set_parameter('keys', keys)
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>500')
self.log_stats_parser.check_and_trigger_conditions([cond1]) self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_trigger = {NO_ENTITY: { expected_trigger = {
1530896414: [9938.0, 16.31508], NO_ENTITY: {
1530896440: [9938.0, 16.346602], 1530896414: [9938.0, 16.31508],
1530896466: [9938.0, 16.284669], 1530896440: [9938.0, 16.346602],
1530896492: [9938.0, 16.16005] 1530896466: [9938.0, 16.284669],
}} 1530896492: [9938.0, 16.16005],
}
}
self.assertDictEqual(expected_trigger, cond1.get_trigger()) self.assertDictEqual(expected_trigger, cond1.get_trigger())
self.log_stats_parser.fetch_timeseries.assert_called_once() self.log_stats_parser.fetch_timeseries.assert_called_once()
@ -114,13 +107,15 @@ class TestDatabasePerfContext(unittest.TestCase):
perf_ts[key] = {} perf_ts[key] = {}
start_val = perf_dict[key] start_val = perf_dict[key]
for ix in range(5): for ix in range(5):
perf_ts[key][timestamp+(ix*10)] = start_val + (2 * ix * ix) perf_ts[key][timestamp + (ix * 10)] = start_val + (2 * ix * ix)
db_perf_context = DatabasePerfContext(perf_ts, 10, True) db_perf_context = DatabasePerfContext(perf_ts, 10, True)
timestamps = [timestamp+(ix*10) for ix in range(1, 5, 1)] timestamps = [timestamp + (ix * 10) for ix in range(1, 5, 1)]
values = [val for val in range(2, 15, 4)] values = [val for val in range(2, 15, 4)]
inner_dict = {timestamps[ix]: values[ix] for ix in range(4)} inner_dict = {timestamps[ix]: values[ix] for ix in range(4)}
expected_keys_ts = {NO_ENTITY: { expected_keys_ts = {
'user_key_comparison_count': inner_dict, NO_ENTITY: {
'block_cache_hit_count': inner_dict "user_key_comparison_count": inner_dict,
}} "block_cache_hit_count": inner_dict,
}
}
self.assertDictEqual(expected_keys_ts, db_perf_context.keys_ts) self.assertDictEqual(expected_keys_ts, db_perf_context.keys_ts)

@ -5,36 +5,32 @@
import os import os
import unittest import unittest
from advisor.rule_parser import RulesSpec
from advisor.db_log_parser import DatabaseLogs, DataSource from advisor.db_log_parser import DatabaseLogs, DataSource
from advisor.db_options_parser import DatabaseOptions from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import RulesSpec
RuleToSuggestions = { RuleToSuggestions = {
"stall-too-many-memtables": [ "stall-too-many-memtables": ["inc-bg-flush", "inc-write-buffer"],
'inc-bg-flush',
'inc-write-buffer'
],
"stall-too-many-L0": [ "stall-too-many-L0": [
'inc-max-subcompactions', "inc-max-subcompactions",
'inc-max-bg-compactions', "inc-max-bg-compactions",
'inc-write-buffer-size', "inc-write-buffer-size",
'dec-max-bytes-for-level-base', "dec-max-bytes-for-level-base",
'inc-l0-slowdown-writes-trigger' "inc-l0-slowdown-writes-trigger",
], ],
"stop-too-many-L0": [ "stop-too-many-L0": [
'inc-max-bg-compactions', "inc-max-bg-compactions",
'inc-write-buffer-size', "inc-write-buffer-size",
'inc-l0-stop-writes-trigger' "inc-l0-stop-writes-trigger",
], ],
"stall-too-many-compaction-bytes": [ "stall-too-many-compaction-bytes": [
'inc-max-bg-compactions', "inc-max-bg-compactions",
'inc-write-buffer-size', "inc-write-buffer-size",
'inc-hard-pending-compaction-bytes-limit', "inc-hard-pending-compaction-bytes-limit",
'inc-soft-pending-compaction-bytes-limit' "inc-soft-pending-compaction-bytes-limit",
], ],
"level0-level1-ratio": [ "level0-level1-ratio": ["l0-l1-ratio-health-check"],
'l0-l1-ratio-health-check'
]
} }
@ -42,19 +38,19 @@ class TestAllRulesTriggered(unittest.TestCase):
def setUp(self): def setUp(self):
# load the Rules # load the Rules
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/triggered_rules.ini') ini_path = os.path.join(this_path, "input_files/triggered_rules.ini")
self.db_rules = RulesSpec(ini_path) self.db_rules = RulesSpec(ini_path)
self.db_rules.load_rules_from_spec() self.db_rules.load_rules_from_spec()
self.db_rules.perform_section_checks() self.db_rules.perform_section_checks()
# load the data sources: LOG and OPTIONS # load the data sources: LOG and OPTIONS
log_path = os.path.join(this_path, 'input_files/LOG-0') log_path = os.path.join(this_path, "input_files/LOG-0")
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
db_options_parser = DatabaseOptions(options_path) db_options_parser = DatabaseOptions(options_path)
self.column_families = db_options_parser.get_column_families() self.column_families = db_options_parser.get_column_families()
db_logs_parser = DatabaseLogs(log_path, self.column_families) db_logs_parser = DatabaseLogs(log_path, self.column_families)
self.data_sources = { self.data_sources = {
DataSource.Type.DB_OPTIONS: [db_options_parser], DataSource.Type.DB_OPTIONS: [db_options_parser],
DataSource.Type.LOG: [db_logs_parser] DataSource.Type.LOG: [db_logs_parser],
} }
def test_triggered_conditions(self): def test_triggered_conditions(self):
@ -65,8 +61,7 @@ class TestAllRulesTriggered(unittest.TestCase):
self.assertFalse(cond.is_triggered(), repr(cond)) self.assertFalse(cond.is_triggered(), repr(cond))
for rule in rules_dict.values(): for rule in rules_dict.values():
self.assertFalse( self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families), rule.is_triggered(conditions_dict, self.column_families), repr(rule)
repr(rule)
) )
# # Trigger the conditions as per the data sources. # # Trigger the conditions as per the data sources.
@ -99,19 +94,19 @@ class TestConditionsConjunctions(unittest.TestCase):
def setUp(self): def setUp(self):
# load the Rules # load the Rules
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/test_rules.ini') ini_path = os.path.join(this_path, "input_files/test_rules.ini")
self.db_rules = RulesSpec(ini_path) self.db_rules = RulesSpec(ini_path)
self.db_rules.load_rules_from_spec() self.db_rules.load_rules_from_spec()
self.db_rules.perform_section_checks() self.db_rules.perform_section_checks()
# load the data sources: LOG and OPTIONS # load the data sources: LOG and OPTIONS
log_path = os.path.join(this_path, 'input_files/LOG-1') log_path = os.path.join(this_path, "input_files/LOG-1")
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005') options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
db_options_parser = DatabaseOptions(options_path) db_options_parser = DatabaseOptions(options_path)
self.column_families = db_options_parser.get_column_families() self.column_families = db_options_parser.get_column_families()
db_logs_parser = DatabaseLogs(log_path, self.column_families) db_logs_parser = DatabaseLogs(log_path, self.column_families)
self.data_sources = { self.data_sources = {
DataSource.Type.DB_OPTIONS: [db_options_parser], DataSource.Type.DB_OPTIONS: [db_options_parser],
DataSource.Type.LOG: [db_logs_parser] DataSource.Type.LOG: [db_logs_parser],
} }
def test_condition_conjunctions(self): def test_condition_conjunctions(self):
@ -122,46 +117,43 @@ class TestConditionsConjunctions(unittest.TestCase):
self.assertFalse(cond.is_triggered(), repr(cond)) self.assertFalse(cond.is_triggered(), repr(cond))
for rule in rules_dict.values(): for rule in rules_dict.values():
self.assertFalse( self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families), rule.is_triggered(conditions_dict, self.column_families), repr(rule)
repr(rule)
) )
# Trigger the conditions as per the data sources. # Trigger the conditions as per the data sources.
self.db_rules.trigger_conditions(self.data_sources) self.db_rules.trigger_conditions(self.data_sources)
# Check for the conditions # Check for the conditions
conds_triggered = ['log-1-true', 'log-2-true', 'log-3-true'] conds_triggered = ["log-1-true", "log-2-true", "log-3-true"]
conds_not_triggered = ['log-4-false', 'options-1-false'] conds_not_triggered = ["log-4-false", "options-1-false"]
for cond in conds_triggered: for cond in conds_triggered:
self.assertTrue(conditions_dict[cond].is_triggered(), repr(cond)) self.assertTrue(conditions_dict[cond].is_triggered(), repr(cond))
for cond in conds_not_triggered: for cond in conds_not_triggered:
self.assertFalse(conditions_dict[cond].is_triggered(), repr(cond)) self.assertFalse(conditions_dict[cond].is_triggered(), repr(cond))
# Check for the rules # Check for the rules
rules_triggered = ['multiple-conds-true'] rules_triggered = ["multiple-conds-true"]
rules_not_triggered = [ rules_not_triggered = [
'single-condition-false', "single-condition-false",
'multiple-conds-one-false', "multiple-conds-one-false",
'multiple-conds-all-false' "multiple-conds-all-false",
] ]
for rule_name in rules_triggered: for rule_name in rules_triggered:
rule = rules_dict[rule_name] rule = rules_dict[rule_name]
self.assertTrue( self.assertTrue(
rule.is_triggered(conditions_dict, self.column_families), rule.is_triggered(conditions_dict, self.column_families), repr(rule)
repr(rule)
) )
for rule_name in rules_not_triggered: for rule_name in rules_not_triggered:
rule = rules_dict[rule_name] rule = rules_dict[rule_name]
self.assertFalse( self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families), rule.is_triggered(conditions_dict, self.column_families), repr(rule)
repr(rule)
) )
class TestSanityChecker(unittest.TestCase): class TestSanityChecker(unittest.TestCase):
def setUp(self): def setUp(self):
this_path = os.path.abspath(os.path.dirname(__file__)) this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/rules_err1.ini') ini_path = os.path.join(this_path, "input_files/rules_err1.ini")
db_rules = RulesSpec(ini_path) db_rules = RulesSpec(ini_path)
db_rules.load_rules_from_spec() db_rules.load_rules_from_spec()
self.rules_dict = db_rules.get_rules_dict() self.rules_dict = db_rules.get_rules_dict()
@ -169,39 +161,39 @@ class TestSanityChecker(unittest.TestCase):
self.suggestions_dict = db_rules.get_suggestions_dict() self.suggestions_dict = db_rules.get_suggestions_dict()
def test_rule_missing_suggestions(self): def test_rule_missing_suggestions(self):
regex = '.*rule must have at least one suggestion.*' regex = ".*rule must have at least one suggestion.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.rules_dict['missing-suggestions'].perform_checks() self.rules_dict["missing-suggestions"].perform_checks()
def test_rule_missing_conditions(self): def test_rule_missing_conditions(self):
regex = '.*rule must have at least one condition.*' regex = ".*rule must have at least one condition.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.rules_dict['missing-conditions'].perform_checks() self.rules_dict["missing-conditions"].perform_checks()
def test_condition_missing_regex(self): def test_condition_missing_regex(self):
regex = '.*provide regex for log condition.*' regex = ".*provide regex for log condition.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-regex'].perform_checks() self.conditions_dict["missing-regex"].perform_checks()
def test_condition_missing_options(self): def test_condition_missing_options(self):
regex = '.*options missing in condition.*' regex = ".*options missing in condition.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-options'].perform_checks() self.conditions_dict["missing-options"].perform_checks()
def test_condition_missing_expression(self): def test_condition_missing_expression(self):
regex = '.*expression missing in condition.*' regex = ".*expression missing in condition.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-expression'].perform_checks() self.conditions_dict["missing-expression"].perform_checks()
def test_suggestion_missing_option(self): def test_suggestion_missing_option(self):
regex = '.*provide option or description.*' regex = ".*provide option or description.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.suggestions_dict['missing-option'].perform_checks() self.suggestions_dict["missing-option"].perform_checks()
def test_suggestion_missing_description(self): def test_suggestion_missing_description(self):
regex = '.*provide option or description.*' regex = ".*provide option or description.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
self.suggestions_dict['missing-description'].perform_checks() self.suggestions_dict["missing-description"].perform_checks()
class TestParsingErrors(unittest.TestCase): class TestParsingErrors(unittest.TestCase):
@ -209,26 +201,26 @@ class TestParsingErrors(unittest.TestCase):
self.this_path = os.path.abspath(os.path.dirname(__file__)) self.this_path = os.path.abspath(os.path.dirname(__file__))
def test_condition_missing_source(self): def test_condition_missing_source(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err2.ini') ini_path = os.path.join(self.this_path, "input_files/rules_err2.ini")
db_rules = RulesSpec(ini_path) db_rules = RulesSpec(ini_path)
regex = '.*provide source for condition.*' regex = ".*provide source for condition.*"
with self.assertRaisesRegex(NotImplementedError, regex): with self.assertRaisesRegex(NotImplementedError, regex):
db_rules.load_rules_from_spec() db_rules.load_rules_from_spec()
def test_suggestion_missing_action(self): def test_suggestion_missing_action(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err3.ini') ini_path = os.path.join(self.this_path, "input_files/rules_err3.ini")
db_rules = RulesSpec(ini_path) db_rules = RulesSpec(ini_path)
regex = '.*provide action for option.*' regex = ".*provide action for option.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
db_rules.load_rules_from_spec() db_rules.load_rules_from_spec()
def test_section_no_name(self): def test_section_no_name(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err4.ini') ini_path = os.path.join(self.this_path, "input_files/rules_err4.ini")
db_rules = RulesSpec(ini_path) db_rules = RulesSpec(ini_path)
regex = 'Parsing error: needed section header:.*' regex = "Parsing error: needed section header:.*"
with self.assertRaisesRegex(ValueError, regex): with self.assertRaisesRegex(ValueError, regex):
db_rules.load_rules_from_spec() db_rules.load_rules_from_spec()
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()

@ -4,57 +4,59 @@
# COPYING file in the root directory) and Apache 2.0 License # COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory). # (found in the LICENSE.Apache file in the root directory).
'''Run benchmark_compare.sh on the most recent build, for CI """Run benchmark_compare.sh on the most recent build, for CI
''' """
import argparse import argparse
import glob import glob
import logging
import os import os
import re import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
class Config: class Config:
def __init__(self, args): def __init__(self, args):
self.version_file = './include/rocksdb/version.h' self.version_file = "./include/rocksdb/version.h"
self.data_dir = os.path.expanduser(f"{args.db_dir}") self.data_dir = os.path.expanduser(f"{args.db_dir}")
self.results_dir = os.path.expanduser(f"{args.output_dir}") self.results_dir = os.path.expanduser(f"{args.output_dir}")
self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh" self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh"
self.benchmark_cwd = f"{os.getcwd()}/tools" self.benchmark_cwd = f"{os.getcwd()}/tools"
benchmark_env_keys = ['LD_LIBRARY_PATH', benchmark_env_keys = [
'NUM_KEYS', "LD_LIBRARY_PATH",
'KEY_SIZE', "NUM_KEYS",
'VALUE_SIZE', "KEY_SIZE",
'CACHE_SIZE_MB', "VALUE_SIZE",
'DURATION_RW', "CACHE_SIZE_MB",
'DURATION_RO', "DURATION_RW",
'MB_WRITE_PER_SEC', "DURATION_RO",
'NUM_THREADS', "MB_WRITE_PER_SEC",
'COMPRESSION_TYPE', "NUM_THREADS",
'MIN_LEVEL_TO_COMPRESS', "COMPRESSION_TYPE",
'WRITE_BUFFER_SIZE_MB', "MIN_LEVEL_TO_COMPRESS",
'TARGET_FILE_SIZE_BASE_MB', "WRITE_BUFFER_SIZE_MB",
'MAX_BYTES_FOR_LEVEL_BASE_MB', "TARGET_FILE_SIZE_BASE_MB",
'MAX_BACKGROUND_JOBS', "MAX_BYTES_FOR_LEVEL_BASE_MB",
'CACHE_INDEX_AND_FILTER_BLOCKS', "MAX_BACKGROUND_JOBS",
'USE_O_DIRECT', "CACHE_INDEX_AND_FILTER_BLOCKS",
'STATS_INTERVAL_SECONDS', "USE_O_DIRECT",
'SUBCOMPACTIONS', "STATS_INTERVAL_SECONDS",
'COMPACTION_STYLE', "SUBCOMPACTIONS",
'CI_TESTS_ONLY'] "COMPACTION_STYLE",
"CI_TESTS_ONLY",
]
def read_version(config): def read_version(config):
majorRegex = re.compile(r'#define ROCKSDB_MAJOR\s([0-9]+)') majorRegex = re.compile(r"#define ROCKSDB_MAJOR\s([0-9]+)")
minorRegex = re.compile(r'#define ROCKSDB_MINOR\s([0-9]+)') minorRegex = re.compile(r"#define ROCKSDB_MINOR\s([0-9]+)")
patchRegex = re.compile(r'#define ROCKSDB_PATCH\s([0-9]+)') patchRegex = re.compile(r"#define ROCKSDB_PATCH\s([0-9]+)")
with open(config.version_file, 'r') as reader: with open(config.version_file, "r") as reader:
major = None major = None
minor = None minor = None
patch = None patch = None
@ -77,8 +79,7 @@ def read_version(config):
def prepare(version_str, config): def prepare(version_str, config):
old_files = glob.glob(f"{config.results_dir}/{version_str}/**", old_files = glob.glob(f"{config.results_dir}/{version_str}/**", recursive=True)
recursive=True)
for f in old_files: for f in old_files:
if os.path.isfile(f): if os.path.isfile(f):
logging.debug(f"remove file {f}") logging.debug(f"remove file {f}")
@ -96,8 +97,10 @@ def prepare(version_str, config):
def results(version_str, config): def results(version_str, config):
# Copy the report TSV file back to the top level of results # Copy the report TSV file back to the top level of results
shutil.copyfile(f"{config.results_dir}/{version_str}/report.tsv", shutil.copyfile(
f"{config.results_dir}/report.tsv") f"{config.results_dir}/{version_str}/report.tsv",
f"{config.results_dir}/report.tsv",
)
def cleanup(version_str, config): def cleanup(version_str, config):
@ -116,32 +119,41 @@ def get_benchmark_env():
def main(): def main():
'''Tool for running benchmark_compare.sh on the most recent build, for CI """Tool for running benchmark_compare.sh on the most recent build, for CI
This tool will This tool will
(1) Work out the current version of RocksDB (1) Work out the current version of RocksDB
(2) Run benchmark_compare with that version alone (2) Run benchmark_compare with that version alone
''' """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='benchmark_compare.sh Python wrapper for CI.') description="benchmark_compare.sh Python wrapper for CI."
)
# --tsvfile is the name of the file to read results from # --tsvfile is the name of the file to read results from
# --esdocument is the ElasticSearch document to push these results into # --esdocument is the ElasticSearch document to push these results into
# #
parser.add_argument('--db_dir', default='~/tmp/rocksdb-benchmark-datadir', parser.add_argument(
help='Database directory hierarchy to use') "--db_dir",
parser.add_argument('--output_dir', default='~/tmp/benchmark-results', default="~/tmp/rocksdb-benchmark-datadir",
help='Benchmark output goes here') help="Database directory hierarchy to use",
parser.add_argument('--num_keys', default='10000', )
help='Number of database keys to use in benchmark test(s) (determines size of test job)') parser.add_argument(
"--output_dir",
default="~/tmp/benchmark-results",
help="Benchmark output goes here",
)
parser.add_argument(
"--num_keys",
default="10000",
help="Number of database keys to use in benchmark test(s) (determines size of test job)",
)
args = parser.parse_args() args = parser.parse_args()
config = Config(args) config = Config(args)
version = read_version(config) version = read_version(config)
if version is None: if version is None:
raise Exception( raise Exception(f"Could not read RocksDB version from {config.version_file}")
f"Could not read RocksDB version from {config.version_file}")
version_str = f"{version[0]}.{version[1]}.{version[2]}" version_str = f"{version[0]}.{version[1]}.{version[2]}"
logging.info(f"Run benchmark_ci with RocksDB version {version_str}") logging.info(f"Run benchmark_ci with RocksDB version {version_str}")
@ -149,9 +161,13 @@ def main():
try: try:
env = get_benchmark_env() env = get_benchmark_env()
env.append(('NUM_KEYS', args.num_keys)) env.append(("NUM_KEYS", args.num_keys))
cmd = [config.benchmark_script, cmd = [
config.data_dir, config.results_dir, version_str] config.benchmark_script,
config.data_dir,
config.results_dir,
version_str,
]
logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}") logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}")
subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd) subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd)
@ -162,5 +178,5 @@ def main():
return 0 return 0
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

@ -8,9 +8,12 @@ import sys
from block_cache_pysim import ( from block_cache_pysim import (
ARCCache, ARCCache,
CacheEntry, CacheEntry,
create_cache,
GDSizeCache, GDSizeCache,
HashTable, HashTable,
HyperbolicPolicy, HyperbolicPolicy,
kMicrosInSecond,
kSampleSize,
LFUPolicy, LFUPolicy,
LinUCBCache, LinUCBCache,
LRUCache, LRUCache,
@ -18,13 +21,10 @@ from block_cache_pysim import (
MRUPolicy, MRUPolicy,
OPTCache, OPTCache,
OPTCacheEntry, OPTCacheEntry,
run,
ThompsonSamplingCache, ThompsonSamplingCache,
TraceCache, TraceCache,
TraceRecord, TraceRecord,
create_cache,
kMicrosInSecond,
kSampleSize,
run,
) )

@ -13,6 +13,7 @@ import random
import sys import sys
import matplotlib import matplotlib
matplotlib.use("Agg") matplotlib.use("Agg")
import matplotlib.backends.backend_pdf import matplotlib.backends.backend_pdf
import matplotlib.pyplot as plt import matplotlib.pyplot as plt

@ -15,8 +15,8 @@ for base in ["buckifier", "build_tools", "coverage", "tools"]:
filenames += glob.glob(base + "/" + suff + ".py") filenames += glob.glob(base + "/" + suff + ".py")
for filename in filenames: for filename in filenames:
source = open(filename, 'r').read() + '\n' source = open(filename, "r").read() + "\n"
# Parses and syntax checks the file, throwing on error. (No pyc written.) # Parses and syntax checks the file, throwing on error. (No pyc written.)
_ = compile(source, filename, 'exec') _ = compile(source, filename, "exec")
print("No syntax errors in {0} .py files".format(len(filenames))) print("No syntax errors in {0} .py files".format(len(filenames)))

@ -2,14 +2,15 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import argparse
import os import os
import sys
import time
import random import random
import tempfile
import subprocess
import shutil import shutil
import argparse import subprocess
import sys
import tempfile
import time
# params overwrite priority: # params overwrite priority:
# for default: # for default:
@ -37,8 +38,9 @@ default_params = {
"batch_protection_bytes_per_key": lambda: random.choice([0, 8]), "batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
"memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]), "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
"block_size": 16384, "block_size": 16384,
"bloom_bits": lambda: random.choice([random.randint(0,19), "bloom_bits": lambda: random.choice(
random.lognormvariate(2.3, 1.3)]), [random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
),
"cache_index_and_filter_blocks": lambda: random.randint(0, 1), "cache_index_and_filter_blocks": lambda: random.randint(0, 1),
"cache_size": 8388608, "cache_size": 8388608,
"charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]), "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
@ -47,12 +49,14 @@ default_params = {
"charge_file_metadata": lambda: random.choice([0, 1]), "charge_file_metadata": lambda: random.choice([0, 1]),
"checkpoint_one_in": 1000000, "checkpoint_one_in": 1000000,
"compression_type": lambda: random.choice( "compression_type": lambda: random.choice(
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]), ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]
"bottommost_compression_type": lambda: ),
"disable" if random.randint(0, 1) == 0 else "bottommost_compression_type": lambda: "disable"
random.choice( if random.randint(0, 1) == 0
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]), else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
"checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]), "checksum_type": lambda: random.choice(
["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]
),
"compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1), "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
"compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1), "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
# Disabled compression_parallel_threads as the feature is not stable # Disabled compression_parallel_threads as the feature is not stable
@ -93,12 +97,12 @@ default_params = {
# the random seed, so the same keys are chosen by every run for disallowing # the random seed, so the same keys are chosen by every run for disallowing
# overwrites. # overwrites.
"nooverwritepercent": 1, "nooverwritepercent": 1,
"open_files": lambda : random.choice([-1, -1, 100, 500000]), "open_files": lambda: random.choice([-1, -1, 100, 500000]),
"optimize_filters_for_memory": lambda: random.randint(0, 1), "optimize_filters_for_memory": lambda: random.randint(0, 1),
"partition_filters": lambda: random.randint(0, 1), "partition_filters": lambda: random.randint(0, 1),
"partition_pinning": lambda: random.randint(0, 3), "partition_pinning": lambda: random.randint(0, 3),
"pause_background_one_in": 1000000, "pause_background_one_in": 1000000,
"prefix_size" : lambda: random.choice([-1, 1, 5, 7, 8]), "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]),
"prefixpercent": 5, "prefixpercent": 5,
"progress_reports": 0, "progress_reports": 0,
"readpercent": 45, "readpercent": 45,
@ -117,7 +121,7 @@ default_params = {
"use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
"mock_direct_io": False, "mock_direct_io": False,
"cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]), "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
# fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false. # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
"use_full_merge_v1": lambda: random.randint(0, 1), "use_full_merge_v1": lambda: random.randint(0, 1),
"use_merge": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1),
# 999 -> use Bloom API # 999 -> use Bloom API
@ -128,34 +132,36 @@ default_params = {
"writepercent": 35, "writepercent": 35,
"format_version": lambda: random.choice([2, 3, 4, 5, 5]), "format_version": lambda: random.choice([2, 3, 4, 5, 5]),
"index_block_restart_interval": lambda: random.choice(range(1, 16)), "index_block_restart_interval": lambda: random.choice(range(1, 16)),
"use_multiget" : lambda: random.randint(0, 1), "use_multiget": lambda: random.randint(0, 1),
"periodic_compaction_seconds" : "periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]), "compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
"compaction_ttl" : lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
# Test small max_manifest_file_size in a smaller chance, as most of the # Test small max_manifest_file_size in a smaller chance, as most of the
# time we wnat manifest history to be preserved to help debug # time we wnat manifest history to be preserved to help debug
"max_manifest_file_size" : lambda : random.choice( "max_manifest_file_size": lambda: random.choice(
[t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]), [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]
),
# Sync mode might make test runs slower so running it in a smaller chance # Sync mode might make test runs slower so running it in a smaller chance
"sync" : lambda : random.choice( "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]),
[1 if t == 0 else 0 for t in range(0, 20)]),
"bytes_per_sync": lambda: random.choice([0, 262144]), "bytes_per_sync": lambda: random.choice([0, 262144]),
"wal_bytes_per_sync": lambda: random.choice([0, 524288]), "wal_bytes_per_sync": lambda: random.choice([0, 524288]),
# Disable compaction_readahead_size because the test is not passing. # Disable compaction_readahead_size because the test is not passing.
#"compaction_readahead_size" : lambda : random.choice( # "compaction_readahead_size" : lambda : random.choice(
# [0, 0, 1024 * 1024]), # [0, 0, 1024 * 1024]),
"db_write_buffer_size" : lambda: random.choice( "db_write_buffer_size": lambda: random.choice(
[0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]), [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]
"avoid_unnecessary_blocking_io" : random.randint(0, 1), ),
"write_dbid_to_manifest" : random.randint(0, 1), "avoid_unnecessary_blocking_io": random.randint(0, 1),
"avoid_flush_during_recovery" : lambda: random.choice( "write_dbid_to_manifest": random.randint(0, 1),
[1 if t == 0 else 0 for t in range(0, 8)]), "avoid_flush_during_recovery": lambda: random.choice(
"max_write_batch_group_size_bytes" : lambda: random.choice( [1 if t == 0 else 0 for t in range(0, 8)]
[16, 64, 1024 * 1024, 16 * 1024 * 1024]), ),
"level_compaction_dynamic_level_bytes" : True, "max_write_batch_group_size_bytes": lambda: random.choice(
[16, 64, 1024 * 1024, 16 * 1024 * 1024]
),
"level_compaction_dynamic_level_bytes": True,
"verify_checksum_one_in": 1000000, "verify_checksum_one_in": 1000000,
"verify_db_one_in": 100000, "verify_db_one_in": 100000,
"continuous_verification_interval" : 0, "continuous_verification_interval": 0,
"max_key_len": 3, "max_key_len": 3,
"key_len_percent_dist": "1,30,69", "key_len_percent_dist": "1,30,69",
"read_fault_one_in": lambda: random.choice([0, 32, 1000]), "read_fault_one_in": lambda: random.choice([0, 32, 1000]),
@ -166,10 +172,11 @@ default_params = {
"get_property_one_in": 1000000, "get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"max_write_buffer_size_to_maintain": lambda: random.choice( "max_write_buffer_size_to_maintain": lambda: random.choice(
[0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]), [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]
),
"user_timestamp_size": 0, "user_timestamp_size": 0,
"secondary_cache_fault_one_in" : lambda: random.choice([0, 0, 32]), "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]),
"prepopulate_block_cache" : lambda: random.choice([0, 1]), "prepopulate_block_cache": lambda: random.choice([0, 1]),
"memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]), "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]),
"memtable_whole_key_filtering": lambda: random.randint(0, 1), "memtable_whole_key_filtering": lambda: random.randint(0, 1),
"detect_filter_construct_corruption": lambda: random.choice([0, 1]), "detect_filter_construct_corruption": lambda: random.choice([0, 1]),
@ -177,9 +184,13 @@ default_params = {
"async_io": lambda: random.choice([0, 1]), "async_io": lambda: random.choice([0, 1]),
"wal_compression": lambda: random.choice(["none", "zstd"]), "wal_compression": lambda: random.choice(["none", "zstd"]),
"verify_sst_unique_id_in_manifest": 1, # always do unique_id verification "verify_sst_unique_id_in_manifest": 1, # always do unique_id verification
"secondary_cache_uri": lambda: random.choice( "secondary_cache_uri": lambda: random.choice(
["", "compressed_secondary_cache://capacity=8388608", [
"compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true"]), "",
"compressed_secondary_cache://capacity=8388608",
"compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true",
]
),
"allow_data_in_errors": True, "allow_data_in_errors": True,
"readahead_size": lambda: random.choice([0, 16384, 524288]), "readahead_size": lambda: random.choice([0, 16384, 524288]),
"initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]), "initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
@ -187,11 +198,12 @@ default_params = {
"num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]), "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]),
} }
_TEST_DIR_ENV_VAR = 'TEST_TMPDIR' _TEST_DIR_ENV_VAR = "TEST_TMPDIR"
_DEBUG_LEVEL_ENV_VAR = 'DEBUG_LEVEL' _DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL"
stress_cmd = "./db_stress" stress_cmd = "./db_stress"
def is_release_mode(): def is_release_mode():
return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0" return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"
@ -207,7 +219,10 @@ def get_dbname(test_name):
os.mkdir(dbname) os.mkdir(dbname)
return dbname return dbname
expected_values_dir = None expected_values_dir = None
def setup_expected_values_dir(): def setup_expected_values_dir():
global expected_values_dir global expected_values_dir
if expected_values_dir is not None: if expected_values_dir is not None:
@ -215,8 +230,7 @@ def setup_expected_values_dir():
expected_dir_prefix = "rocksdb_crashtest_expected_" expected_dir_prefix = "rocksdb_crashtest_expected_"
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is None or test_tmpdir == "": if test_tmpdir is None or test_tmpdir == "":
expected_values_dir = tempfile.mkdtemp( expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix)
prefix=expected_dir_prefix)
else: else:
# if tmpdir is specified, store the expected_values_dir under that dir # if tmpdir is specified, store the expected_values_dir under that dir
expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected" expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected"
@ -225,7 +239,10 @@ def setup_expected_values_dir():
os.mkdir(expected_values_dir) os.mkdir(expected_values_dir)
return expected_values_dir return expected_values_dir
multiops_txn_key_spaces_file = None multiops_txn_key_spaces_file = None
def setup_multiops_txn_key_spaces_file(): def setup_multiops_txn_key_spaces_file():
global multiops_txn_key_spaces_file global multiops_txn_key_spaces_file
if multiops_txn_key_spaces_file is not None: if multiops_txn_key_spaces_file is not None:
@ -233,13 +250,15 @@ def setup_multiops_txn_key_spaces_file():
key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces" key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces"
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is None or test_tmpdir == "": if test_tmpdir is None or test_tmpdir == "":
multiops_txn_key_spaces_file = tempfile.mkstemp( multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[
prefix=key_spaces_file_prefix)[1] 1
]
else: else:
if not os.path.exists(test_tmpdir): if not os.path.exists(test_tmpdir):
os.mkdir(test_tmpdir) os.mkdir(test_tmpdir)
multiops_txn_key_spaces_file = tempfile.mkstemp( multiops_txn_key_spaces_file = tempfile.mkstemp(
prefix=key_spaces_file_prefix, dir=test_tmpdir)[1] prefix=key_spaces_file_prefix, dir=test_tmpdir
)[1]
return multiops_txn_key_spaces_file return multiops_txn_key_spaces_file
@ -291,7 +310,7 @@ simple_default_params = {
"write_buffer_size": 32 * 1024 * 1024, "write_buffer_size": 32 * 1024 * 1024,
"level_compaction_dynamic_level_bytes": False, "level_compaction_dynamic_level_bytes": False,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"verify_iterator_with_expected_state_one_in": 5 # this locks a range of keys "verify_iterator_with_expected_state_one_in": 5, # this locks a range of keys
} }
blackbox_simple_default_params = { blackbox_simple_default_params = {
@ -317,7 +336,7 @@ cf_consistency_params = {
} }
txn_params = { txn_params = {
"use_txn" : 1, "use_txn": 1,
# Avoid lambda to set it once for the entire test # Avoid lambda to set it once for the entire test
"txn_write_policy": random.randint(0, 2), "txn_write_policy": random.randint(0, 2),
"unordered_write": random.randint(0, 1), "unordered_write": random.randint(0, 1),
@ -347,10 +366,14 @@ blob_params = {
"blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]), "blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]),
"blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]), "blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]),
"enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3), "enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3),
"blob_garbage_collection_age_cutoff": lambda: random.choice([0.0, 0.25, 0.5, 0.75, 1.0]), "blob_garbage_collection_age_cutoff": lambda: random.choice(
[0.0, 0.25, 0.5, 0.75, 1.0]
),
"blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]), "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
"blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]), "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
"blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]), "blob_file_starting_level": lambda: random.choice(
[0] * 4 + [1] * 3 + [2] * 2 + [3]
),
"use_blob_cache": lambda: random.randint(0, 1), "use_blob_cache": lambda: random.randint(0, 1),
"use_shared_block_and_blob_cache": lambda: random.randint(0, 1), "use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
"blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]), "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
@ -418,7 +441,7 @@ multiops_txn_default_params = {
# compactions. # compactions.
"flush_one_in": 1000, "flush_one_in": 1000,
"key_spaces_path": setup_multiops_txn_key_spaces_file(), "key_spaces_path": setup_multiops_txn_key_spaces_file(),
"rollback_one_in": 4, "rollback_one_in": 4,
# Re-enable once we have a compaction for MultiOpsTxnStressTest # Re-enable once we have a compaction for MultiOpsTxnStressTest
"enable_compaction_filter": 0, "enable_compaction_filter": 0,
"create_timestamped_snapshot_one_in": 50, "create_timestamped_snapshot_one_in": 50,
@ -446,11 +469,11 @@ multiops_wp_txn_params = {
"create_timestamped_snapshot_one_in": 0, "create_timestamped_snapshot_one_in": 0,
} }
def finalize_and_sanitize(src_params): def finalize_and_sanitize(src_params):
dest_params = dict([(k, v() if callable(v) else v) dest_params = {k : v() if callable(v) else v for (k, v) in src_params.items()}
for (k, v) in src_params.items()])
if is_release_mode(): if is_release_mode():
dest_params['read_fault_one_in'] = 0 dest_params["read_fault_one_in"] = 0
if dest_params.get("compression_max_dict_bytes") == 0: if dest_params.get("compression_max_dict_bytes") == 0:
dest_params["compression_zstd_max_train_bytes"] = 0 dest_params["compression_zstd_max_train_bytes"] = 0
dest_params["compression_max_dict_buffer_bytes"] = 0 dest_params["compression_max_dict_buffer_bytes"] = 0
@ -466,13 +489,15 @@ def finalize_and_sanitize(src_params):
# used by `IngestExternalFile()`, causing it to fail with mmap # used by `IngestExternalFile()`, causing it to fail with mmap
# reads. Remove this once it is fixed. # reads. Remove this once it is fixed.
dest_params["ingest_external_file_one_in"] = 0 dest_params["ingest_external_file_one_in"] = 0
if (dest_params["use_direct_io_for_flush_and_compaction"] == 1 if (
or dest_params["use_direct_reads"] == 1) and \ dest_params["use_direct_io_for_flush_and_compaction"] == 1
not is_direct_io_supported(dest_params["db"]): or dest_params["use_direct_reads"] == 1
) and not is_direct_io_supported(dest_params["db"]):
if is_release_mode(): if is_release_mode():
print("{} does not support direct IO. Disabling use_direct_reads and " print(
"use_direct_io_for_flush_and_compaction.\n".format( "{} does not support direct IO. Disabling use_direct_reads and "
dest_params["db"])) "use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"])
)
dest_params["use_direct_reads"] = 0 dest_params["use_direct_reads"] = 0
dest_params["use_direct_io_for_flush_and_compaction"] = 0 dest_params["use_direct_io_for_flush_and_compaction"] = 0
else: else:
@ -480,18 +505,22 @@ def finalize_and_sanitize(src_params):
# Multi-key operations are not currently compatible with transactions or # Multi-key operations are not currently compatible with transactions or
# timestamp. # timestamp.
if (dest_params.get("test_batches_snapshots") == 1 or if (
dest_params.get("use_txn") == 1 or dest_params.get("test_batches_snapshots") == 1
dest_params.get("user_timestamp_size") > 0): or dest_params.get("use_txn") == 1
or dest_params.get("user_timestamp_size") > 0
):
dest_params["delpercent"] += dest_params["delrangepercent"] dest_params["delpercent"] += dest_params["delrangepercent"]
dest_params["delrangepercent"] = 0 dest_params["delrangepercent"] = 0
dest_params["ingest_external_file_one_in"] = 0 dest_params["ingest_external_file_one_in"] = 0
# Correctness testing with unsync data loss is not currently compatible # Correctness testing with unsync data loss is not currently compatible
# with transactions # with transactions
if (dest_params.get("use_txn") == 1): if dest_params.get("use_txn") == 1:
dest_params["sync_fault_injection"] = 0 dest_params["sync_fault_injection"] = 0
if (dest_params.get("disable_wal") == 1 or if (
dest_params.get("sync_fault_injection") == 1): dest_params.get("disable_wal") == 1
or dest_params.get("sync_fault_injection") == 1
):
# File ingestion does not guarantee prefix-recoverability when unsynced # File ingestion does not guarantee prefix-recoverability when unsynced
# data can be lost. Ingesting a file syncs data immediately that is # data can be lost. Ingesting a file syncs data immediately that is
# newer than unsynced memtable data that can be lost on restart. # newer than unsynced memtable data that can be lost on restart.
@ -544,8 +573,10 @@ def finalize_and_sanitize(src_params):
dest_params["readpercent"] += dest_params.get("prefixpercent", 20) dest_params["readpercent"] += dest_params.get("prefixpercent", 20)
dest_params["prefixpercent"] = 0 dest_params["prefixpercent"] = 0
dest_params["test_batches_snapshots"] = 0 dest_params["test_batches_snapshots"] = 0
if (dest_params.get("prefix_size") == -1 and if (
dest_params.get("memtable_whole_key_filtering") == 0): dest_params.get("prefix_size") == -1
and dest_params.get("memtable_whole_key_filtering") == 0
):
dest_params["memtable_prefix_bloom_size_ratio"] = 0 dest_params["memtable_prefix_bloom_size_ratio"] = 0
if dest_params.get("two_write_queues") == 1: if dest_params.get("two_write_queues") == 1:
dest_params["enable_pipelined_write"] = 0 dest_params["enable_pipelined_write"] = 0
@ -566,19 +597,20 @@ def finalize_and_sanitize(src_params):
return dest_params return dest_params
def gen_cmd_params(args): def gen_cmd_params(args):
params = {} params = {}
params.update(default_params) params.update(default_params)
if args.test_type == 'blackbox': if args.test_type == "blackbox":
params.update(blackbox_default_params) params.update(blackbox_default_params)
if args.test_type == 'whitebox': if args.test_type == "whitebox":
params.update(whitebox_default_params) params.update(whitebox_default_params)
if args.simple: if args.simple:
params.update(simple_default_params) params.update(simple_default_params)
if args.test_type == 'blackbox': if args.test_type == "blackbox":
params.update(blackbox_simple_default_params) params.update(blackbox_simple_default_params)
if args.test_type == 'whitebox': if args.test_type == "whitebox":
params.update(whitebox_simple_default_params) params.update(whitebox_simple_default_params)
if args.cf_consistency: if args.cf_consistency:
params.update(cf_consistency_params) params.update(cf_consistency_params)
@ -590,9 +622,9 @@ def gen_cmd_params(args):
params.update(ts_params) params.update(ts_params)
if args.test_multiops_txn: if args.test_multiops_txn:
params.update(multiops_txn_default_params) params.update(multiops_txn_default_params)
if args.write_policy == 'write_committed': if args.write_policy == "write_committed":
params.update(multiops_wc_txn_params) params.update(multiops_wc_txn_params)
elif args.write_policy == 'write_prepared': elif args.write_policy == "write_prepared":
params.update(multiops_wp_txn_params) params.update(multiops_wp_txn_params)
if args.test_tiered_storage: if args.test_tiered_storage:
params.update(tiered_params) params.update(tiered_params)
@ -600,9 +632,12 @@ def gen_cmd_params(args):
# Best-effort recovery, user defined timestamp, tiered storage are currently # Best-effort recovery, user defined timestamp, tiered storage are currently
# incompatible with BlobDB. Test BE recovery if specified on the command # incompatible with BlobDB. Test BE recovery if specified on the command
# line; otherwise, apply BlobDB related overrides with a 10% chance. # line; otherwise, apply BlobDB related overrides with a 10% chance.
if (not args.test_best_efforts_recovery and if (
not args.enable_ts and not args.test_tiered_storage and not args.test_best_efforts_recovery
random.choice([0] * 9 + [1]) == 1): and not args.enable_ts
and not args.test_tiered_storage
and random.choice([0] * 9 + [1]) == 1
):
params.update(blob_params) params.update(blob_params)
for k, v in vars(args).items(): for k, v in vars(args).items():
@ -613,68 +648,87 @@ def gen_cmd_params(args):
def gen_cmd(params, unknown_params): def gen_cmd(params, unknown_params):
finalzied_params = finalize_and_sanitize(params) finalzied_params = finalize_and_sanitize(params)
cmd = [stress_cmd] + [ cmd = (
'--{0}={1}'.format(k, v) [stress_cmd]
for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)] + [
if k not in set(['test_type', 'simple', 'duration', 'interval', "--{0}={1}".format(k, v)
'random_kill_odd', 'cf_consistency', 'txn', for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
'test_best_efforts_recovery', 'enable_ts', if k
'test_multiops_txn', 'write_policy', 'stress_cmd', not in {
'test_tiered_storage']) "test_type",
and v is not None] + unknown_params "simple",
"duration",
"interval",
"random_kill_odd",
"cf_consistency",
"txn",
"test_best_efforts_recovery",
"enable_ts",
"test_multiops_txn",
"write_policy",
"stress_cmd",
"test_tiered_storage",
}
and v is not None
]
+ unknown_params
)
return cmd return cmd
def execute_cmd(cmd, timeout): def execute_cmd(cmd, timeout):
child = subprocess.Popen(cmd, stderr=subprocess.PIPE, child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
stdout=subprocess.PIPE) print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
print("Running db_stress with pid=%d: %s\n\n"
% (child.pid, ' '.join(cmd)))
try: try:
outs, errs = child.communicate(timeout=timeout) outs, errs = child.communicate(timeout=timeout)
hit_timeout = False hit_timeout = False
print("WARNING: db_stress ended before kill: exitcode=%d\n" print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
% child.returncode)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
hit_timeout = True hit_timeout = True
child.kill() child.kill()
print("KILLED %d\n" % child.pid) print("KILLED %d\n" % child.pid)
outs, errs = child.communicate() outs, errs = child.communicate()
return hit_timeout, child.returncode, outs.decode('utf-8'), errs.decode('utf-8') return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8")
# This script runs and kills db_stress multiple times. It checks consistency # This script runs and kills db_stress multiple times. It checks consistency
# in case of unsafe crashes in RocksDB. # in case of unsafe crashes in RocksDB.
def blackbox_crash_main(args, unknown_args): def blackbox_crash_main(args, unknown_args):
cmd_params = gen_cmd_params(args) cmd_params = gen_cmd_params(args)
dbname = get_dbname('blackbox') dbname = get_dbname("blackbox")
exit_time = time.time() + cmd_params['duration'] exit_time = time.time() + cmd_params["duration"]
print("Running blackbox-crash-test with \n" print(
+ "interval_between_crash=" + str(cmd_params['interval']) + "\n" "Running blackbox-crash-test with \n"
+ "total-duration=" + str(cmd_params['duration']) + "\n") + "interval_between_crash="
+ str(cmd_params["interval"])
+ "\n"
+ "total-duration="
+ str(cmd_params["duration"])
+ "\n"
)
while time.time() < exit_time: while time.time() < exit_time:
cmd = gen_cmd(dict( cmd = gen_cmd(
list(cmd_params.items()) dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
+ list({'db': dbname}.items())), unknown_args) )
hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params['interval']) hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"])
if not hit_timeout: if not hit_timeout:
print('Exit Before Killing') print("Exit Before Killing")
print('stdout:') print("stdout:")
print(outs) print(outs)
print('stderr:') print("stderr:")
print(errs) print(errs)
sys.exit(2) sys.exit(2)
for line in errs.split('\n'): for line in errs.split("\n"):
if line != '' and not line.startswith('WARNING'): if line != "" and not line.startswith("WARNING"):
print('stderr has error message:') print("stderr has error message:")
print('***' + line + '***') print("***" + line + "***")
time.sleep(1) # time to stabilize before the next run time.sleep(1) # time to stabilize before the next run
@ -688,89 +742,109 @@ def blackbox_crash_main(args, unknown_args):
# kill_random_test that causes rocksdb to crash at various points in code. # kill_random_test that causes rocksdb to crash at various points in code.
def whitebox_crash_main(args, unknown_args): def whitebox_crash_main(args, unknown_args):
cmd_params = gen_cmd_params(args) cmd_params = gen_cmd_params(args)
dbname = get_dbname('whitebox') dbname = get_dbname("whitebox")
cur_time = time.time() cur_time = time.time()
exit_time = cur_time + cmd_params['duration'] exit_time = cur_time + cmd_params["duration"]
half_time = cur_time + cmd_params['duration'] // 2 half_time = cur_time + cmd_params["duration"] // 2
print("Running whitebox-crash-test with \n" print(
+ "total-duration=" + str(cmd_params['duration']) + "\n") "Running whitebox-crash-test with \n"
+ "total-duration="
+ str(cmd_params["duration"])
+ "\n"
)
total_check_mode = 4 total_check_mode = 4
check_mode = 0 check_mode = 0
kill_random_test = cmd_params['random_kill_odd'] kill_random_test = cmd_params["random_kill_odd"]
kill_mode = 0 kill_mode = 0
while time.time() < exit_time: while time.time() < exit_time:
if check_mode == 0: if check_mode == 0:
additional_opts = { additional_opts = {
# use large ops per thread since we will kill it anyway # use large ops per thread since we will kill it anyway
"ops_per_thread": 100 * cmd_params['ops_per_thread'], "ops_per_thread": 100
* cmd_params["ops_per_thread"],
} }
# run with kill_random_test, with three modes. # run with kill_random_test, with three modes.
# Mode 0 covers all kill points. Mode 1 covers less kill points but # Mode 0 covers all kill points. Mode 1 covers less kill points but
# increases change of triggering them. Mode 2 covers even less # increases change of triggering them. Mode 2 covers even less
# frequent kill points and further increases triggering change. # frequent kill points and further increases triggering change.
if kill_mode == 0: if kill_mode == 0:
additional_opts.update({ additional_opts.update(
"kill_random_test": kill_random_test, {
}) "kill_random_test": kill_random_test,
}
)
elif kill_mode == 1: elif kill_mode == 1:
if cmd_params.get('disable_wal', 0) == 1: if cmd_params.get("disable_wal", 0) == 1:
my_kill_odd = kill_random_test // 50 + 1 my_kill_odd = kill_random_test // 50 + 1
else: else:
my_kill_odd = kill_random_test // 10 + 1 my_kill_odd = kill_random_test // 10 + 1
additional_opts.update({ additional_opts.update(
"kill_random_test": my_kill_odd, {
"kill_exclude_prefixes": "WritableFileWriter::Append," "kill_random_test": my_kill_odd,
+ "WritableFileWriter::WriteBuffered", "kill_exclude_prefixes": "WritableFileWriter::Append,"
}) + "WritableFileWriter::WriteBuffered",
}
)
elif kill_mode == 2: elif kill_mode == 2:
# TODO: May need to adjust random odds if kill_random_test # TODO: May need to adjust random odds if kill_random_test
# is too small. # is too small.
additional_opts.update({ additional_opts.update(
"kill_random_test": (kill_random_test // 5000 + 1), {
"kill_exclude_prefixes": "WritableFileWriter::Append," "kill_random_test": (kill_random_test // 5000 + 1),
"WritableFileWriter::WriteBuffered," "kill_exclude_prefixes": "WritableFileWriter::Append,"
"PosixMmapFile::Allocate,WritableFileWriter::Flush", "WritableFileWriter::WriteBuffered,"
}) "PosixMmapFile::Allocate,WritableFileWriter::Flush",
}
)
# Run kill mode 0, 1 and 2 by turn. # Run kill mode 0, 1 and 2 by turn.
kill_mode = (kill_mode + 1) % 3 kill_mode = (kill_mode + 1) % 3
elif check_mode == 1: elif check_mode == 1:
# normal run with universal compaction mode # normal run with universal compaction mode
additional_opts = { additional_opts = {
"kill_random_test": None, "kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'], "ops_per_thread": cmd_params["ops_per_thread"],
"compaction_style": 1, "compaction_style": 1,
} }
# Single level universal has a lot of special logic. Ensure we cover # Single level universal has a lot of special logic. Ensure we cover
# it sometimes. # it sometimes.
if random.randint(0, 1) == 1: if random.randint(0, 1) == 1:
additional_opts.update({ additional_opts.update(
"num_levels": 1, {
}) "num_levels": 1,
}
)
elif check_mode == 2: elif check_mode == 2:
# normal run with FIFO compaction mode # normal run with FIFO compaction mode
# ops_per_thread is divided by 5 because FIFO compaction # ops_per_thread is divided by 5 because FIFO compaction
# style is quite a bit slower on reads with lot of files # style is quite a bit slower on reads with lot of files
additional_opts = { additional_opts = {
"kill_random_test": None, "kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'] // 5, "ops_per_thread": cmd_params["ops_per_thread"] // 5,
"compaction_style": 2, "compaction_style": 2,
} }
else: else:
# normal run # normal run
additional_opts = { additional_opts = {
"kill_random_test": None, "kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'], "ops_per_thread": cmd_params["ops_per_thread"],
} }
cmd = gen_cmd(dict(list(cmd_params.items()) cmd = gen_cmd(
+ list(additional_opts.items()) dict(
+ list({'db': dbname}.items())), unknown_args) list(cmd_params.items())
+ list(additional_opts.items())
+ list({"db": dbname}.items())
),
unknown_args,
)
print("Running:" + ' '.join(cmd) + "\n") # noqa: E999 T25377293 Grandfathered in print(
"Running:" + " ".join(cmd) + "\n"
) # noqa: E999 T25377293 Grandfathered in
# If the running time is 15 minutes over the run time, explicit kill and # If the running time is 15 minutes over the run time, explicit kill and
# exit even if white box kill didn't hit. This is to guarantee run time # exit even if white box kill didn't hit. This is to guarantee run time
@ -779,9 +853,11 @@ def whitebox_crash_main(args, unknown_args):
# TODO detect a hanging condition. The job might run too long as RocksDB # TODO detect a hanging condition. The job might run too long as RocksDB
# hits a hanging bug. # hits a hanging bug.
hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd( hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd(
cmd, exit_time - time.time() + 900) cmd, exit_time - time.time() + 900
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format( )
check_mode, additional_opts['kill_random_test'], retncode)) msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format(
check_mode, additional_opts["kill_random_test"], retncode
)
print(msg) print(msg)
print(stdoutdata) print(stdoutdata)
@ -792,10 +868,10 @@ def whitebox_crash_main(args, unknown_args):
break break
expected = False expected = False
if additional_opts['kill_random_test'] is None and (retncode == 0): if additional_opts["kill_random_test"] is None and (retncode == 0):
# we expect zero retncode if no kill option # we expect zero retncode if no kill option
expected = True expected = True
elif additional_opts['kill_random_test'] is not None and retncode <= 0: elif additional_opts["kill_random_test"] is not None and retncode <= 0:
# When kill option is given, the test MIGHT kill itself. # When kill option is given, the test MIGHT kill itself.
# If it does, negative retncode is expected. Otherwise 0. # If it does, negative retncode is expected. Otherwise 0.
expected = True expected = True
@ -805,15 +881,13 @@ def whitebox_crash_main(args, unknown_args):
sys.exit(1) sys.exit(1)
stderrdata = stderrdata.lower() stderrdata = stderrdata.lower()
errorcount = (stderrdata.count('error') - errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times")
stderrdata.count('got errors 0 times')) print("#times error occurred in output is " + str(errorcount) + "\n")
print("#times error occurred in output is " + str(errorcount) +
"\n")
if (errorcount > 0): if errorcount > 0:
print("TEST FAILED. Output has 'error'!!!\n") print("TEST FAILED. Output has 'error'!!!\n")
sys.exit(2) sys.exit(2)
if (stderrdata.find('fail') >= 0): if stderrdata.find("fail") >= 0:
print("TEST FAILED. Output has 'fail'!!!\n") print("TEST FAILED. Output has 'fail'!!!\n")
sys.exit(2) sys.exit(2)
@ -824,7 +898,7 @@ def whitebox_crash_main(args, unknown_args):
# success # success
shutil.rmtree(dbname, True) shutil.rmtree(dbname, True)
os.mkdir(dbname) os.mkdir(dbname)
cmd_params.pop('expected_values_dir', None) cmd_params.pop("expected_values_dir", None)
check_mode = (check_mode + 1) % total_check_mode check_mode = (check_mode + 1) % total_check_mode
time.sleep(1) # time to stabilize after a kill time.sleep(1) # time to stabilize after a kill
@ -833,34 +907,38 @@ def whitebox_crash_main(args, unknown_args):
def main(): def main():
global stress_cmd global stress_cmd
parser = argparse.ArgumentParser(description="This script runs and kills \ parser = argparse.ArgumentParser(
db_stress multiple times") description="This script runs and kills \
db_stress multiple times"
)
parser.add_argument("test_type", choices=["blackbox", "whitebox"]) parser.add_argument("test_type", choices=["blackbox", "whitebox"])
parser.add_argument("--simple", action="store_true") parser.add_argument("--simple", action="store_true")
parser.add_argument("--cf_consistency", action='store_true') parser.add_argument("--cf_consistency", action="store_true")
parser.add_argument("--txn", action='store_true') parser.add_argument("--txn", action="store_true")
parser.add_argument("--test_best_efforts_recovery", action='store_true') parser.add_argument("--test_best_efforts_recovery", action="store_true")
parser.add_argument("--enable_ts", action='store_true') parser.add_argument("--enable_ts", action="store_true")
parser.add_argument("--test_multiops_txn", action='store_true') parser.add_argument("--test_multiops_txn", action="store_true")
parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"]) parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
parser.add_argument("--stress_cmd") parser.add_argument("--stress_cmd")
parser.add_argument("--test_tiered_storage", action='store_true') parser.add_argument("--test_tiered_storage", action="store_true")
all_params = dict(list(default_params.items()) all_params = dict(
+ list(blackbox_default_params.items()) list(default_params.items())
+ list(whitebox_default_params.items()) + list(blackbox_default_params.items())
+ list(simple_default_params.items()) + list(whitebox_default_params.items())
+ list(blackbox_simple_default_params.items()) + list(simple_default_params.items())
+ list(whitebox_simple_default_params.items()) + list(blackbox_simple_default_params.items())
+ list(blob_params.items()) + list(whitebox_simple_default_params.items())
+ list(ts_params.items()) + list(blob_params.items())
+ list(multiops_txn_default_params.items()) + list(ts_params.items())
+ list(multiops_wc_txn_params.items()) + list(multiops_txn_default_params.items())
+ list(multiops_wp_txn_params.items()) + list(multiops_wc_txn_params.items())
+ list(best_efforts_recovery_params.items()) + list(multiops_wp_txn_params.items())
+ list(cf_consistency_params.items()) + list(best_efforts_recovery_params.items())
+ list(tiered_params.items()) + list(cf_consistency_params.items())
+ list(txn_params.items())) + list(tiered_params.items())
+ list(txn_params.items())
)
for k, v in all_params.items(): for k, v in all_params.items():
parser.add_argument("--" + k, type=type(v() if callable(v) else v)) parser.add_argument("--" + k, type=type(v() if callable(v) else v))
@ -869,15 +947,17 @@ def main():
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR) test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is not None and not os.path.isdir(test_tmpdir): if test_tmpdir is not None and not os.path.isdir(test_tmpdir):
print('%s env var is set to a non-existent directory: %s' % print(
(_TEST_DIR_ENV_VAR, test_tmpdir)) "%s env var is set to a non-existent directory: %s"
% (_TEST_DIR_ENV_VAR, test_tmpdir)
)
sys.exit(1) sys.exit(1)
if args.stress_cmd: if args.stress_cmd:
stress_cmd = args.stress_cmd stress_cmd = args.stress_cmd
if args.test_type == 'blackbox': if args.test_type == "blackbox":
blackbox_crash_main(args, unknown_args) blackbox_crash_main(args, unknown_args)
if args.test_type == 'whitebox': if args.test_type == "whitebox":
whitebox_crash_main(args, unknown_args) whitebox_crash_main(args, unknown_args)
# Only delete the `expected_values_dir` if test passes # Only delete the `expected_values_dir` if test passes
if expected_values_dir is not None: if expected_values_dir is not None:
@ -886,5 +966,5 @@ def main():
os.remove(multiops_txn_key_spaces_file) os.remove(multiops_txn_key_spaces_file)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

@ -2,65 +2,72 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob import glob
import os
import os.path import os.path
import re
import shutil import shutil
import subprocess import subprocess
import tempfile
import time import time
import unittest import unittest
import tempfile
import re
def my_check_output(*popenargs, **kwargs): def my_check_output(*popenargs, **kwargs):
""" """
If we had python 2.7, we should simply use subprocess.check_output. If we had python 2.7, we should simply use subprocess.check_output.
This is a stop-gap solution for python 2.6 This is a stop-gap solution for python 2.6
""" """
if 'stdout' in kwargs: if "stdout" in kwargs:
raise ValueError('stdout argument not allowed, it will be overridden.') raise ValueError("stdout argument not allowed, it will be overridden.")
process = subprocess.Popen(stderr=subprocess.PIPE, stdout=subprocess.PIPE, process = subprocess.Popen(
*popenargs, **kwargs) stderr=subprocess.PIPE, stdout=subprocess.PIPE, *popenargs, **kwargs
)
output, unused_err = process.communicate() output, unused_err = process.communicate()
retcode = process.poll() retcode = process.poll()
if retcode: if retcode:
cmd = kwargs.get("args") cmd = kwargs.get("args")
if cmd is None: if cmd is None:
cmd = popenargs[0] cmd = popenargs[0]
raise Exception("Exit code is not 0. It is %d. Command: %s" % raise Exception("Exit code is not 0. It is %d. Command: %s" % (retcode, cmd))
(retcode, cmd)) return output.decode("utf-8")
return output.decode('utf-8')
def run_err_null(cmd): def run_err_null(cmd):
return os.system(cmd + " 2>/dev/null ") return os.system(cmd + " 2>/dev/null ")
class LDBTestCase(unittest.TestCase): class LDBTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_") self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_")
self.DB_NAME = "testdb" self.DB_NAME = "testdb"
def tearDown(self): def tearDown(self):
assert(self.TMP_DIR.strip() != "/" assert (
and self.TMP_DIR.strip() != "/tmp" self.TMP_DIR.strip() != "/"
and self.TMP_DIR.strip() != "/tmp/") #Just some paranoia and self.TMP_DIR.strip() != "/tmp"
and self.TMP_DIR.strip() != "/tmp/"
) # Just some paranoia
shutil.rmtree(self.TMP_DIR) shutil.rmtree(self.TMP_DIR)
def dbParam(self, dbName): def dbParam(self, dbName):
return "--db=%s" % os.path.join(self.TMP_DIR, dbName) return "--db=%s" % os.path.join(self.TMP_DIR, dbName)
def assertRunOKFull(self, params, expectedOutput, unexpected=False, def assertRunOKFull(
isPattern=False): self, params, expectedOutput, unexpected=False, isPattern=False
):
""" """
All command-line params must be specified. All command-line params must be specified.
Allows full flexibility in testing; for example: missing db param. Allows full flexibility in testing; for example: missing db param.
""" """
output = my_check_output("./ldb %s |grep -v \"Created bg thread\"" % output = my_check_output(
params, shell=True) './ldb %s |grep -v "Created bg thread"' % params, shell=True
)
if not unexpected: if not unexpected:
if isPattern: if isPattern:
self.assertNotEqual(expectedOutput.search(output.strip()), self.assertNotEqual(expectedOutput.search(output.strip()), None)
None)
else: else:
self.assertEqual(output.strip(), expectedOutput.strip()) self.assertEqual(output.strip(), expectedOutput.strip())
else: else:
@ -76,20 +83,25 @@ class LDBTestCase(unittest.TestCase):
""" """
try: try:
my_check_output("./ldb %s >/dev/null 2>&1 |grep -v \"Created bg \ my_check_output(
thread\"" % params, shell=True) './ldb %s >/dev/null 2>&1 |grep -v "Created bg \
thread"'
% params,
shell=True,
)
except Exception: except Exception:
return return
self.fail( self.fail(
"Exception should have been raised for command with params: %s" % "Exception should have been raised for command with params: %s" % params
params) )
def assertRunOK(self, params, expectedOutput, unexpected=False): def assertRunOK(self, params, expectedOutput, unexpected=False):
""" """
Uses the default test db. Uses the default test db.
""" """
self.assertRunOKFull("%s %s" % (self.dbParam(self.DB_NAME), params), self.assertRunOKFull(
expectedOutput, unexpected) "%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected
)
def assertRunFAIL(self, params): def assertRunFAIL(self, params):
""" """
@ -118,16 +130,17 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("scan --to=x2", "x1 : y1") self.assertRunOK("scan --to=x2", "x1 : y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1") self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=2", self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 : y1\nx2 : y2")
"x1 : y1\nx2 : y2")
self.assertRunOK("scan --from=x1 --to=z --max_keys=3", self.assertRunOK(
"x1 : y1\nx2 : y2\nx3 : y3") "scan --from=x1 --to=z --max_keys=3", "x1 : y1\nx2 : y2\nx3 : y3"
self.assertRunOK("scan --from=x1 --to=z --max_keys=4", )
"x1 : y1\nx2 : y2\nx3 : y3") self.assertRunOK(
"scan --from=x1 --to=z --max_keys=4", "x1 : y1\nx2 : y2\nx3 : y3"
)
self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1") self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1")
self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3") self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3")
self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL
self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo") self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3")
@ -148,18 +161,18 @@ class LDBTestCase(unittest.TestCase):
return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params)) return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params))
def writeExternSst(self, params, inputDumpFile, outputSst): def writeExternSst(self, params, inputDumpFile, outputSst):
return 0 == run_err_null("cat %s | ./ldb write_extern_sst %s %s" return 0 == run_err_null(
% (inputDumpFile, outputSst, params)) "cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params)
)
def ingestExternSst(self, params, inputSst): def ingestExternSst(self, params, inputSst):
return 0 == run_err_null("./ldb ingest_extern_sst %s %s" return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params))
% (inputSst, params))
def testStringBatchPut(self): def testStringBatchPut(self):
print("Running testStringBatchPut...") print("Running testStringBatchPut...")
self.assertRunOK("batchput x1 y1 --create_if_missing", "OK") self.assertRunOK("batchput x1 y1 --create_if_missing", "OK")
self.assertRunOK("scan", "x1 : y1") self.assertRunOK("scan", "x1 : y1")
self.assertRunOK("batchput x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz")
self.assertRunFAIL("batchput") self.assertRunFAIL("batchput")
self.assertRunFAIL("batchput k1") self.assertRunFAIL("batchput k1")
@ -171,7 +184,9 @@ class LDBTestCase(unittest.TestCase):
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("scan", "x1 : y1") self.assertRunOK("scan", "x1 : y1")
self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz")
blob_files = self.getBlobFiles(dbPath) blob_files = self.getBlobFiles(dbPath)
@ -195,13 +210,18 @@ class LDBTestCase(unittest.TestCase):
print("Running testBlobStartingLevel...") print("Running testBlobStartingLevel...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", "OK") self.assertRunOK(
"put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1",
"OK",
)
self.assertRunOK("get x1", "y1") self.assertRunOK("get x1", "y1")
blob_files = self.getBlobFiles(dbPath) blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) == 0) self.assertTrue(len(blob_files) == 0)
self.assertRunOK("put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK") self.assertRunOK(
"put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK"
)
self.assertRunOK("get x1", "y1") self.assertRunOK("get x1", "y1")
self.assertRunOK("get x2", "y2") self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3") self.assertRunFAIL("get x3")
@ -213,19 +233,37 @@ class LDBTestCase(unittest.TestCase):
print("Running testCountDelimDump...") print("Running testCountDelimDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("dump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") self.assertRunOK(
self.assertRunOK("dump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") "dump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'dump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("dump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8") self.assertRunOK(
'dump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testCountDelimIDump(self): def testCountDelimIDump(self):
print("Running testCountDelimIDump...") print("Running testCountDelimIDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK") self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK") self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("idump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") self.assertRunOK(
self.assertRunOK("idump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8") "idump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'idump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK") self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("idump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8") self.assertRunOK(
'idump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testInvalidCmdLines(self): def testInvalidCmdLines(self):
print("Running testInvalidCmdLines...") print("Running testInvalidCmdLines...")
@ -253,12 +291,13 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("get --key_hex 0x6132", "b2") self.assertRunOK("get --key_hex 0x6132", "b2")
self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232") self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232")
self.assertRunOK("get --value_hex a2", "0x6232") self.assertRunOK("get --value_hex a2", "0x6232")
self.assertRunOK("scan --key_hex --value_hex", self.assertRunOK(
"0x6131 : 0x6231\n0x6132 : 0x6232") "scan --key_hex --value_hex", "0x6131 : 0x6231\n0x6132 : 0x6232"
self.assertRunOK("scan --hex --from=0x6131 --to=0x6133", )
"0x6131 : 0x6231\n0x6132 : 0x6232") self.assertRunOK(
self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "scan --hex --from=0x6131 --to=0x6133", "0x6131 : 0x6231\n0x6132 : 0x6232"
"0x6131 : 0x6231") )
self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 : 0x6231")
self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2") self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2")
self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232") self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232")
self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK") self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK")
@ -272,8 +311,7 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK") self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK")
self.assertRunOK("scan --hex", "0x6131 : 0x6231", True) self.assertRunOK("scan --hex", "0x6131 : 0x6231", True)
self.assertRunOK("dump --ttl ", "a1 ==> b1", True) self.assertRunOK("dump --ttl ", "a1 ==> b1", True)
self.assertRunOK("dump --hex --ttl ", self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1")
"0x6131 ==> 0x6231\nKeys in range: 1")
self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231") self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231")
self.assertRunOK("get --value_hex a1", "0x6231", True) self.assertRunOK("get --value_hex a1", "0x6231", True)
self.assertRunOK("get --ttl a1", "b1") self.assertRunOK("get --ttl a1", "b1")
@ -295,8 +333,7 @@ class LDBTestCase(unittest.TestCase):
def testDumpLoad(self): def testDumpLoad(self):
print("Running testDumpLoad...") print("Running testDumpLoad...")
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
"OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
@ -304,98 +341,125 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath = os.path.join(self.TMP_DIR, "dump1") dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb( self.assertTrue(
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
self.assertRunOKFull("scan --db=%s" % loadedDbPath, )
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load in hex # Dump and load in hex
dumpFilePath = os.path.join(self.TMP_DIR, "dump2") dumpFilePath = os.path.join(self.TMP_DIR, "dump2")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2")
self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath)) self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb( self.assertTrue(
"--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath)) self.loadDb(
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") )
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump only a portion of the key range # Dump only a portion of the key range
dumpFilePath = os.path.join(self.TMP_DIR, "dump3") dumpFilePath = os.path.join(self.TMP_DIR, "dump3")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3")
self.assertTrue(self.dumpDb( self.assertTrue(
"--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)) self.dumpDb("--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)
self.assertTrue(self.loadDb( )
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2") self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2")
# Dump upto max_keys rows # Dump upto max_keys rows
dumpFilePath = os.path.join(self.TMP_DIR, "dump4") dumpFilePath = os.path.join(self.TMP_DIR, "dump4")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4")
self.assertTrue(self.dumpDb( self.assertTrue(self.dumpDb("--db=%s --max_keys=3" % origDbPath, dumpFilePath))
"--db=%s --max_keys=3" % origDbPath, dumpFilePath)) self.assertTrue(
self.assertTrue(self.loadDb( self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) )
self.assertRunOKFull("scan --db=%s" % loadedDbPath, self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3")
"x1 : y1\nx2 : y2\nx3 : y3")
# Load into an existing db, create_if_missing is not specified # Load into an existing db, create_if_missing is not specified
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath)) self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath, self.assertRunOKFull(
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load with WAL disabled # Dump and load with WAL disabled
dumpFilePath = os.path.join(self.TMP_DIR, "dump5") dumpFilePath = os.path.join(self.TMP_DIR, "dump5")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb( self.assertTrue(
"--db=%s --disable_wal --create_if_missing" % loadedDbPath, self.loadDb(
dumpFilePath)) "--db=%s --disable_wal --create_if_missing" % loadedDbPath, dumpFilePath
self.assertRunOKFull("scan --db=%s" % loadedDbPath, )
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") )
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load with lots of extra params specified # Dump and load with lots of extra params specified
extraParams = " ".join(["--bloom_bits=14", "--block_size=1024", extraParams = " ".join(
"--auto_compaction=true", [
"--write_buffer_size=4194304", "--bloom_bits=14",
"--file_size=2097152"]) "--block_size=1024",
"--auto_compaction=true",
"--write_buffer_size=4194304",
"--file_size=2097152",
]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump6") dumpFilePath = os.path.join(self.TMP_DIR, "dump6")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6")
self.assertTrue(self.dumpDb( self.assertTrue(
"--db=%s %s" % (origDbPath, extraParams), dumpFilePath)) self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath)
self.assertTrue(self.loadDb( )
"--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), self.assertTrue(
dumpFilePath)) self.loadDb(
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams),
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") dumpFilePath,
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump with count_only # Dump with count_only
dumpFilePath = os.path.join(self.TMP_DIR, "dump7") dumpFilePath = os.path.join(self.TMP_DIR, "dump7")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7")
self.assertTrue(self.dumpDb( self.assertTrue(self.dumpDb("--db=%s --count_only" % origDbPath, dumpFilePath))
"--db=%s --count_only" % origDbPath, dumpFilePath)) self.assertTrue(
self.assertTrue(self.loadDb( self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)) )
# DB should have atleast one value for scan to work # DB should have atleast one value for scan to work
self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK") self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK")
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1") self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1")
# Dump command fails because of typo in params # Dump command fails because of typo in params
dumpFilePath = os.path.join(self.TMP_DIR, "dump8") dumpFilePath = os.path.join(self.TMP_DIR, "dump8")
self.assertFalse(self.dumpDb( self.assertFalse(
"--db=%s --create_if_missing" % origDbPath, dumpFilePath)) self.dumpDb("--db=%s --create_if_missing" % origDbPath, dumpFilePath)
)
# Dump and load with BlobDB enabled # Dump and load with BlobDB enabled
blobParams = " ".join(["--enable_blob_files", "--min_blob_size=1", blobParams = " ".join(
"--blob_file_size=2097152"]) ["--enable_blob_files", "--min_blob_size=1", "--blob_file_size=2097152"]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump9") dumpFilePath = os.path.join(self.TMP_DIR, "dump9")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9")
self.assertTrue(self.dumpDb( self.assertTrue(self.dumpDb("--db=%s" % (origDbPath), dumpFilePath))
"--db=%s" % (origDbPath), dumpFilePath)) self.assertTrue(
self.assertTrue(self.loadDb( self.loadDb(
"--db=%s %s --create_if_missing --disable_wal" % (loadedDbPath, blobParams), "--db=%s %s --create_if_missing --disable_wal"
dumpFilePath)) % (loadedDbPath, blobParams),
self.assertRunOKFull("scan --db=%s" % loadedDbPath, dumpFilePath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") )
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
blob_files = self.getBlobFiles(loadedDbPath) blob_files = self.getBlobFiles(loadedDbPath)
self.assertTrue(len(blob_files) >= 1) self.assertTrue(len(blob_files) >= 1)
@ -404,12 +468,14 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put a val --create_if_missing", "OK") self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val", "OK") self.assertRunOK("put b val", "OK")
self.assertRunOK( self.assertRunOK(
"idump", "'a' seq:1, type:1 => val\n" "idump",
"'b' seq:2, type:1 => val\nInternal keys in range: 2") "'a' seq:1, type:1 => val\n"
"'b' seq:2, type:1 => val\nInternal keys in range: 2",
)
self.assertRunOK( self.assertRunOK(
"idump --input_key_hex --from=%s --to=%s" % (hex(ord('a')), "idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))),
hex(ord('b'))), "'a' seq:1, type:1 => val\nInternal keys in range: 1",
"'a' seq:1, type:1 => val\nInternal keys in range: 1") )
def testIDumpDecodeBlobIndex(self): def testIDumpDecodeBlobIndex(self):
print("Running testIDumpDecodeBlobIndex...") print("Running testIDumpDecodeBlobIndex...")
@ -420,45 +486,55 @@ class LDBTestCase(unittest.TestCase):
regex = ".*\[blob ref\].*" regex = ".*\[blob ref\].*"
expected_pattern = re.compile(regex) expected_pattern = re.compile(regex)
cmd = "idump %s --decode_blob_index" cmd = "idump %s --decode_blob_index"
self.assertRunOKFull((cmd) self.assertRunOKFull(
% (self.dbParam(self.DB_NAME)), (cmd) % (self.dbParam(self.DB_NAME)),
expected_pattern, unexpected=False, expected_pattern,
isPattern=True) unexpected=False,
isPattern=True,
)
def testMiscAdminTask(self): def testMiscAdminTask(self):
print("Running testMiscAdminTask...") print("Running testMiscAdminTask...")
# These tests need to be improved; for example with asserts about # These tests need to be improved; for example with asserts about
# whether compaction or level reduction actually took place. # whether compaction or level reduction actually took place.
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
"OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertTrue(0 == run_err_null( self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath))
"./ldb compact --db=%s" % origDbPath))
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null( self.assertTrue(
"./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)) 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null( self.assertTrue(
"./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)) 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null( self.assertTrue(
"./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)) 0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null( self.assertTrue(
"./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" 0
% origDbPath)) == run_err_null(
"./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath
)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
#TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. # TODO(dilip): Not sure what should be passed to WAL.Currently corrupted.
self.assertTrue(0 == run_err_null( self.assertTrue(
"./ldb dump_wal --db=%s --walfile=%s --header" % ( 0
origDbPath, os.path.join(origDbPath, "LOG")))) == run_err_null(
"./ldb dump_wal --db=%s --walfile=%s --header"
% (origDbPath, os.path.join(origDbPath, "LOG"))
)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
def testCheckConsistency(self): def testCheckConsistency(self):
@ -470,8 +546,9 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("get x1", "y1") self.assertRunOK("get x1", "y1")
self.assertRunOK("checkconsistency", "OK") self.assertRunOK("checkconsistency", "OK")
sstFilePath = my_check_output("ls %s" % os.path.join(dbPath, "*.sst"), sstFilePath = my_check_output(
shell=True) "ls %s" % os.path.join(dbPath, "*.sst"), shell=True
)
# Modify the file # Modify the file
my_check_output("echo 'evil' > %s" % sstFilePath, shell=True) my_check_output("echo 'evil' > %s" % sstFilePath, shell=True)
@ -482,8 +559,7 @@ class LDBTestCase(unittest.TestCase):
self.assertRunFAIL("checkconsistency") self.assertRunFAIL("checkconsistency")
def dumpLiveFiles(self, params, dumpFile): def dumpLiveFiles(self, params, dumpFile):
return 0 == run_err_null("./ldb dump_live_files %s > %s" % ( return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile))
params, dumpFile))
def testDumpLiveFiles(self): def testDumpLiveFiles(self):
print("Running testDumpLiveFiles...") print("Running testDumpLiveFiles...")
@ -506,7 +582,12 @@ class LDBTestCase(unittest.TestCase):
dbPath += "/" dbPath += "/"
# Call the dump_live_files function with the edited dbPath name. # Call the dump_live_files function with the edited dbPath name.
self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, dumpFilePath)) self.assertTrue(
self.dumpLiveFiles(
"--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath,
dumpFilePath,
)
)
# Investigate the output # Investigate the output
with open(dumpFilePath, "r") as tmp: with open(dumpFilePath, "r") as tmp:
@ -517,14 +598,14 @@ class LDBTestCase(unittest.TestCase):
self.assertTrue(len(sstFileList) >= 1) self.assertTrue(len(sstFileList) >= 1)
for sstFilename in sstFileList: for sstFilename in sstFileList:
filenumber = re.findall(r"\d+.sst", sstFilename)[0] filenumber = re.findall(r"\d+.sst", sstFilename)[0]
self.assertEqual(sstFilename, dbPath+filenumber) self.assertEqual(sstFilename, dbPath + filenumber)
# Check that all the Blob filenames have a correct full path (no multiple '/'). # Check that all the Blob filenames have a correct full path (no multiple '/').
blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data) blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data)
self.assertTrue(len(blobFileList) >= 1) self.assertTrue(len(blobFileList) >= 1)
for blobFilename in blobFileList: for blobFilename in blobFileList:
filenumber = re.findall(r"\d+.blob", blobFilename)[0] filenumber = re.findall(r"\d+.blob", blobFilename)[0]
self.assertEqual(blobFilename, dbPath+filenumber) self.assertEqual(blobFilename, dbPath + filenumber)
# Check that all the manifest filenames # Check that all the manifest filenames
# have a correct full path (no multiple '/'). # have a correct full path (no multiple '/').
@ -532,15 +613,16 @@ class LDBTestCase(unittest.TestCase):
self.assertTrue(len(manifestFileList) >= 1) self.assertTrue(len(manifestFileList) >= 1)
for manifestFilename in manifestFileList: for manifestFilename in manifestFileList:
filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0]
self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber) self.assertEqual(manifestFilename, dbPath + "MANIFEST-" + filenumber)
# Check that the blob file index is decoded. # Check that the blob file index is decoded.
decodedBlobIndex = re.findall(r"\[blob ref\]", data) decodedBlobIndex = re.findall(r"\[blob ref\]", data)
self.assertTrue(len(decodedBlobIndex) >= 1) self.assertTrue(len(decodedBlobIndex) >= 1)
def listLiveFilesMetadata(self, params, dumpFile): def listLiveFilesMetadata(self, params, dumpFile):
return 0 == run_err_null("./ldb list_live_files_metadata %s > %s" % ( return 0 == run_err_null(
params, dumpFile)) "./ldb list_live_files_metadata %s > %s" % (params, dumpFile)
)
def testListLiveFilesMetadata(self): def testListLiveFilesMetadata(self):
print("Running testListLiveFilesMetadata...") print("Running testListLiveFilesMetadata...")
@ -554,23 +636,27 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1") dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1)) self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1))
dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2") dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2")
self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath2)) self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath2
)
)
# Collect SST filename and level from dump_live_files # Collect SST filename and level from dump_live_files
with open(dumpFilePath1, "r") as tmp: with open(dumpFilePath1, "r") as tmp:
data = tmp.read() data = tmp.read()
filename1 = re.findall(r".*\d+\.sst",data)[0] filename1 = re.findall(r".*\d+\.sst", data)[0]
level1 = re.findall(r"level:\d+",data)[0].split(':')[1] level1 = re.findall(r"level:\d+", data)[0].split(":")[1]
# Collect SST filename and level from list_live_files_metadata # Collect SST filename and level from list_live_files_metadata
with open(dumpFilePath2, "r") as tmp: with open(dumpFilePath2, "r") as tmp:
data = tmp.read() data = tmp.read()
filename2 = re.findall(r".*\d+\.sst",data)[0] filename2 = re.findall(r".*\d+\.sst", data)[0]
level2 = re.findall(r"level \d+",data)[0].split(' ')[1] level2 = re.findall(r"level \d+", data)[0].split(" ")[1]
# Assert equality between filenames and levels. # Assert equality between filenames and levels.
self.assertEqual(filename1,filename2) self.assertEqual(filename1, filename2)
self.assertEqual(level1,level2) self.assertEqual(level1, level2)
# Create multiple column families and compare the output # Create multiple column families and compare the output
# of list_live_files_metadata with dump_live_files once again. # of list_live_files_metadata with dump_live_files once again.
@ -586,7 +672,11 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3") dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3)) self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3))
dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4") dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4")
self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath4)) self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath4
)
)
# dump_live_files: # dump_live_files:
# parse the output and create a map: # parse the output and create a map:
@ -601,7 +691,7 @@ class LDBTestCase(unittest.TestCase):
# re.findall should not reorder the data. # re.findall should not reorder the data.
# Therefore namesAndLevels[i] matches the data from cfs[i]. # Therefore namesAndLevels[i] matches the data from cfs[i].
for count, nameAndLevel in enumerate(namesAndLevels): for count, nameAndLevel in enumerate(namesAndLevels):
sstFilename = re.findall(r"\d+.sst",nameAndLevel)[0] sstFilename = re.findall(r"\d+.sst", nameAndLevel)[0]
sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0] sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0]
cf = cfs[count] cf = cfs[count]
referenceMap[sstFilename] = [sstLevel, cf] referenceMap[sstFilename] = [sstLevel, cf]
@ -618,13 +708,13 @@ class LDBTestCase(unittest.TestCase):
sstLines = re.findall(r".*\d+.sst.*", data) sstLines = re.findall(r".*\d+.sst.*", data)
for line in sstLines: for line in sstLines:
sstFilename = re.findall(r"\d+.sst", line)[0] sstFilename = re.findall(r"\d+.sst", line)[0]
sstLevel = re.findall(r"(?<=level )\d+",line)[0] sstLevel = re.findall(r"(?<=level )\d+", line)[0]
cf = re.findall(r"(?<=column family \')\w+(?=\')",line)[0] cf = re.findall(r"(?<=column family \')\w+(?=\')", line)[0]
testMap[sstFilename] = [sstLevel, cf] testMap[sstFilename] = [sstLevel, cf]
# Compare the map obtained from dump_live_files and the map # Compare the map obtained from dump_live_files and the map
# obtained from list_live_files_metadata. Everything should match. # obtained from list_live_files_metadata. Everything should match.
self.assertEqual(referenceMap,testMap) self.assertEqual(referenceMap, testMap)
def getManifests(self, directory): def getManifests(self, directory):
return glob.glob(directory + "/MANIFEST-*") return glob.glob(directory + "/MANIFEST-*")
@ -657,25 +747,30 @@ class LDBTestCase(unittest.TestCase):
manifest_files = self.getManifests(dbPath) manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 1) self.assertTrue(len(manifest_files) == 1)
# Test with the default manifest file in dbPath. # Test with the default manifest file in dbPath.
self.assertRunOKFull(cmd % dbPath, expected_pattern, self.assertRunOKFull(
unexpected=False, isPattern=True) cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
)
self.copyManifests(manifest_files[0], manifest_files[0] + "1") self.copyManifests(manifest_files[0], manifest_files[0] + "1")
manifest_files = self.getManifests(dbPath) manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 2) self.assertTrue(len(manifest_files) == 2)
# Test with multiple manifest files in dbPath. # Test with multiple manifest files in dbPath.
self.assertRunFAILFull(cmd % dbPath) self.assertRunFAILFull(cmd % dbPath)
# Running it with the copy we just created should pass. # Running it with the copy we just created should pass.
self.assertRunOKFull((cmd + " --path=%s") self.assertRunOKFull(
% (dbPath, manifest_files[1]), (cmd + " --path=%s") % (dbPath, manifest_files[1]),
expected_pattern, unexpected=False, expected_pattern,
isPattern=True) unexpected=False,
isPattern=True,
)
# Make sure that using the dump with --path will result in identical # Make sure that using the dump with --path will result in identical
# output as just using manifest_dump. # output as just using manifest_dump.
cmd = "dump --path=%s" cmd = "dump --path=%s"
self.assertRunOKFull((cmd) self.assertRunOKFull(
% (manifest_files[1]), (cmd) % (manifest_files[1]),
expected_pattern, unexpected=False, expected_pattern,
isPattern=True) unexpected=False,
isPattern=True,
)
# Check if null characters doesn't infer with output format. # Check if null characters doesn't infer with output format.
self.assertRunOK("put a1 b1", "OK") self.assertRunOK("put a1 b1", "OK")
@ -696,11 +791,14 @@ class LDBTestCase(unittest.TestCase):
# Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\' # Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\'
# (we cannot use null character in the subprocess input either, # (we cannot use null character in the subprocess input either,
# so we have to use '.{2}') # so we have to use '.{2}')
cmd_verbose = "manifest_dump --verbose --db=%s | grep -aq $'\'r.{2}O\'' && echo 'matched' || echo 'not matched'" %dbPath cmd_verbose = (
"manifest_dump --verbose --db=%s | grep -aq $''r.{2}O'' && echo 'matched' || echo 'not matched'"
self.assertRunOKFull(cmd_verbose , expected_verbose_output, % dbPath
unexpected=False, isPattern=True) )
self.assertRunOKFull(
cmd_verbose, expected_verbose_output, unexpected=False, isPattern=True
)
def testGetProperty(self): def testGetProperty(self):
print("Running testGetProperty...") print("Running testGetProperty...")
@ -709,16 +807,15 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put 2 2", "OK") self.assertRunOK("put 2 2", "OK")
# A "string" property # A "string" property
cmd = "--db=%s get_property rocksdb.estimate-num-keys" cmd = "--db=%s get_property rocksdb.estimate-num-keys"
self.assertRunOKFull(cmd % dbPath, self.assertRunOKFull(cmd % dbPath, "rocksdb.estimate-num-keys: 2")
"rocksdb.estimate-num-keys: 2")
# A "map" property # A "map" property
# FIXME: why doesn't this pick up two entries? # FIXME: why doesn't this pick up two entries?
cmd = "--db=%s get_property rocksdb.aggregated-table-properties" cmd = "--db=%s get_property rocksdb.aggregated-table-properties"
part = "rocksdb.aggregated-table-properties.num_entries: " part = "rocksdb.aggregated-table-properties.num_entries: "
expected_pattern = re.compile(part) expected_pattern = re.compile(part)
self.assertRunOKFull(cmd % dbPath, self.assertRunOKFull(
expected_pattern, unexpected=False, cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
isPattern=True) )
# An invalid property # An invalid property
cmd = "--db=%s get_property rocksdb.this-property-does-not-exist" cmd = "--db=%s get_property rocksdb.this-property-does-not-exist"
self.assertRunFAILFull(cmd % dbPath) self.assertRunFAILFull(cmd % dbPath)
@ -738,27 +835,27 @@ class LDBTestCase(unittest.TestCase):
sst_files = self.getSSTFiles(dbPath) sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1) self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s --decode_blob_index" cmd = "dump --path=%s --decode_blob_index"
self.assertRunOKFull((cmd) self.assertRunOKFull(
% (sst_files[0]), (cmd) % (sst_files[0]), expected_pattern, unexpected=False, isPattern=True
expected_pattern, unexpected=False, )
isPattern=True)
def testBlobDump(self): def testBlobDump(self):
print("Running testBlobDump") print("Running testBlobDump")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
# Pattern to expect from blob file dump. # Pattern to expect from blob file dump.
regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa
expected_pattern = re.compile(regex) expected_pattern = re.compile(regex)
blob_files = self.getBlobFiles(dbPath) blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1) self.assertTrue(len(blob_files) >= 1)
cmd = "dump --path=%s --dump_uncompressed_blobs" cmd = "dump --path=%s --dump_uncompressed_blobs"
self.assertRunOKFull((cmd) self.assertRunOKFull(
% (blob_files[0]), (cmd) % (blob_files[0]), expected_pattern, unexpected=False, isPattern=True
expected_pattern, unexpected=False, )
isPattern=True)
def testWALDump(self): def testWALDump(self):
print("Running testWALDump...") print("Running testWALDump...")
@ -775,15 +872,14 @@ class LDBTestCase(unittest.TestCase):
wal_files = self.getWALFiles(dbPath) wal_files = self.getWALFiles(dbPath)
self.assertTrue(len(wal_files) >= 1) self.assertTrue(len(wal_files) >= 1)
cmd = "dump --path=%s" cmd = "dump --path=%s"
self.assertRunOKFull((cmd) self.assertRunOKFull(
% (wal_files[0]), (cmd) % (wal_files[0]), expected_pattern, unexpected=False, isPattern=True
expected_pattern, unexpected=False, )
isPattern=True)
def testListColumnFamilies(self): def testListColumnFamilies(self):
print("Running testListColumnFamilies...") print("Running testListColumnFamilies...")
self.assertRunOK("put x1 y1 --create_if_missing", "OK") self.assertRunOK("put x1 y1 --create_if_missing", "OK")
cmd = "list_column_families | grep -v \"Column families\"" cmd = 'list_column_families | grep -v "Column families"'
# Test on valid dbPath. # Test on valid dbPath.
self.assertRunOK(cmd, "{default}") self.assertRunOK(cmd, "{default}")
# Test on empty path. # Test on empty path.
@ -791,34 +887,28 @@ class LDBTestCase(unittest.TestCase):
def testColumnFamilies(self): def testColumnFamilies(self):
print("Running testColumnFamilies...") print("Running testColumnFamilies...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) # noqa: F841 T25377293 Grandfathered in _ = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put cf1_1 1 --create_if_missing", "OK") self.assertRunOK("put cf1_1 1 --create_if_missing", "OK")
self.assertRunOK("put cf1_2 2 --create_if_missing", "OK") self.assertRunOK("put cf1_2 2 --create_if_missing", "OK")
self.assertRunOK("put cf1_3 3 --try_load_options", "OK") self.assertRunOK("put cf1_3 3 --try_load_options", "OK")
# Given non-default column family to single CF DB. # Given non-default column family to single CF DB.
self.assertRunFAIL("get cf1_1 --column_family=two") self.assertRunFAIL("get cf1_1 --column_family=two")
self.assertRunOK("create_column_family two", "OK") self.assertRunOK("create_column_family two", "OK")
self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", "OK")
"OK") self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", "OK")
self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two",
"OK")
self.assertRunOK("delete cf1_2", "OK") self.assertRunOK("delete cf1_2", "OK")
self.assertRunOK("create_column_family three", "OK") self.assertRunOK("create_column_family three", "OK")
self.assertRunOK("delete cf2_2 --column_family=two", "OK") self.assertRunOK("delete cf2_2 --column_family=two", "OK")
self.assertRunOK( self.assertRunOK("put cf3_1 3 --create_if_missing --column_family=three", "OK")
"put cf3_1 3 --create_if_missing --column_family=three",
"OK")
self.assertRunOK("get cf1_1 --column_family=default", "1") self.assertRunOK("get cf1_1 --column_family=default", "1")
self.assertRunOK("dump --column_family=two", self.assertRunOK("dump --column_family=two", "cf2_1 ==> 1\nKeys in range: 1")
"cf2_1 ==> 1\nKeys in range: 1") self.assertRunOK(
self.assertRunOK("dump --column_family=two --try_load_options", "dump --column_family=two --try_load_options",
"cf2_1 ==> 1\nKeys in range: 1") "cf2_1 ==> 1\nKeys in range: 1",
self.assertRunOK("dump", )
"cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2") self.assertRunOK("dump", "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2")
self.assertRunOK("get cf2_1 --column_family=two", self.assertRunOK("get cf2_1 --column_family=two", "1")
"1") self.assertRunOK("get cf3_1 --column_family=three", "3")
self.assertRunOK("get cf3_1 --column_family=three",
"3")
self.assertRunOK("drop_column_family three", "OK") self.assertRunOK("drop_column_family three", "OK")
# non-existing column family. # non-existing column family.
self.assertRunFAIL("get cf3_1 --column_family=four") self.assertRunFAIL("get cf3_1 --column_family=four")
@ -830,32 +920,36 @@ class LDBTestCase(unittest.TestCase):
# Dump, load, write external sst and ingest it in another db # Dump, load, write external sst and ingest it in another db
dbPath = os.path.join(self.TMP_DIR, "db1") dbPath = os.path.join(self.TMP_DIR, "db1")
self.assertRunOK( self.assertRunOK(
"batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" "batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath,
% dbPath, "OK",
"OK") )
self.assertRunOK("scan --db=%s" % dbPath, self.assertRunOK("scan --db=%s" % dbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
dumpFilePath = os.path.join(self.TMP_DIR, "dump1") dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
with open(dumpFilePath, 'w') as f: with open(dumpFilePath, "w") as f:
f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40") f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40")
externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst") externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst")
self.assertTrue(self.writeExternSst("--create_if_missing --db=%s" self.assertTrue(
% dbPath, self.writeExternSst(
dumpFilePath, "--create_if_missing --db=%s" % dbPath, dumpFilePath, externSstPath
externSstPath)) )
)
# cannot ingest if allow_global_seqno is false # cannot ingest if allow_global_seqno is false
self.assertFalse( self.assertFalse(
self.ingestExternSst( self.ingestExternSst(
"--create_if_missing --allow_global_seqno=false --db=%s" "--create_if_missing --allow_global_seqno=false --db=%s" % dbPath,
% dbPath, externSstPath,
externSstPath)) )
)
self.assertTrue( self.assertTrue(
self.ingestExternSst( self.ingestExternSst(
"--create_if_missing --allow_global_seqno --db=%s" "--create_if_missing --allow_global_seqno --db=%s" % dbPath,
% dbPath, externSstPath,
externSstPath)) )
self.assertRunOKFull("scan --db=%s" % dbPath, )
"x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40") self.assertRunOKFull(
"scan --db=%s" % dbPath, "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40"
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

@ -2,11 +2,12 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import subprocess
import argparse import argparse
import random import random
import time
import subprocess
import sys import sys
import time
def generate_runtimes(total_runtime): def generate_runtimes(total_runtime):
@ -24,31 +25,33 @@ def generate_runtimes(total_runtime):
def main(args): def main(args):
runtimes = generate_runtimes(int(args.runtime_sec)) runtimes = generate_runtimes(int(args.runtime_sec))
print("Going to execute write stress for " + str(runtimes)) # noqa: E999 T25377293 Grandfathered in print(
"Going to execute write stress for " + str(runtimes)
) # noqa: E999 T25377293 Grandfathered in
first_time = True first_time = True
for runtime in runtimes: for runtime in runtimes:
kill = random.choice([False, True]) kill = random.choice([False, True])
cmd = './write_stress --runtime_sec=' + \ cmd = "./write_stress --runtime_sec=" + ("-1" if kill else str(runtime))
("-1" if kill else str(runtime))
if len(args.db) > 0: if len(args.db) > 0:
cmd = cmd + ' --db=' + args.db cmd = cmd + " --db=" + args.db
if first_time: if first_time:
first_time = False first_time = False
else: else:
# use current db # use current db
cmd = cmd + ' --destroy_db=false' cmd = cmd + " --destroy_db=false"
if random.choice([False, True]): if random.choice([False, True]):
cmd = cmd + ' --delete_obsolete_files_with_fullscan=true' cmd = cmd + " --delete_obsolete_files_with_fullscan=true"
if random.choice([False, True]): if random.choice([False, True]):
cmd = cmd + ' --low_open_files_mode=true' cmd = cmd + " --low_open_files_mode=true"
print("Running write_stress for %d seconds (%s): %s" % print(
(runtime, ("kill-mode" if kill else "clean-shutdown-mode"), "Running write_stress for %d seconds (%s): %s"
cmd)) % (runtime, ("kill-mode" if kill else "clean-shutdown-mode"), cmd)
)
child = subprocess.Popen([cmd], shell=True) child = subprocess.Popen([cmd], shell=True)
killtime = time.time() + runtime killtime = time.time() + runtime
@ -58,19 +61,23 @@ def main(args):
if child.returncode == 0: if child.returncode == 0:
break break
else: else:
print("ERROR: write_stress died with exitcode=%d\n" print(
% child.returncode) "ERROR: write_stress died with exitcode=%d\n" % child.returncode
)
sys.exit(1) sys.exit(1)
if kill: if kill:
child.kill() child.kill()
# breathe # breathe
time.sleep(3) time.sleep(3)
if __name__ == '__main__':
if __name__ == "__main__":
random.seed(time.time()) random.seed(time.time())
parser = argparse.ArgumentParser(description="This script runs and kills \ parser = argparse.ArgumentParser(
write_stress multiple times") description="This script runs and kills \
parser.add_argument("--runtime_sec", default='1000') write_stress multiple times"
parser.add_argument("--db", default='') )
parser.add_argument("--runtime_sec", default="1000")
parser.add_argument("--db", default="")
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)

Loading…
Cancel
Save