Enable BLACK for internal_repo_rocksdb (#10710)

Summary:
Enable BLACK for internal_repo_rocksdb.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10710

Reviewed By: riversand963, zsol

Differential Revision: D39666245

Pulled By: gitbw95

fbshipit-source-id: ef364318d2bbba66e96f3211dd6a975174d52c21
main
Bo Wang 2 years ago committed by Facebook GitHub Bot
parent 00050d4634
commit 9e01de9066
  1. 150
      buckifier/buckify_rocksdb.py
  2. 163
      buckifier/targets_builder.py
  3. 9
      buckifier/targets_cfg.py
  4. 57
      buckifier/util.py
  5. 87
      build_tools/amalgamate.py
  6. 157
      build_tools/benchmark_log_tool.py
  7. 130
      build_tools/error_filter.py
  8. 37
      coverage/parse_gcov_output.py
  9. 14
      tools/advisor/advisor/bench_runner.py
  10. 116
      tools/advisor/advisor/config_optimizer_example.py
  11. 110
      tools/advisor/advisor/db_bench_runner.py
  12. 133
      tools/advisor/advisor/db_config_optimizer.py
  13. 37
      tools/advisor/advisor/db_log_parser.py
  14. 120
      tools/advisor/advisor/db_options_parser.py
  15. 128
      tools/advisor/advisor/db_stats_fetcher.py
  16. 39
      tools/advisor/advisor/db_timeseries_parser.py
  17. 24
      tools/advisor/advisor/ini_parser.py
  18. 200
      tools/advisor/advisor/rule_parser.py
  19. 83
      tools/advisor/advisor/rule_parser_example.py
  20. 100
      tools/advisor/test/test_db_bench_runner.py
  21. 75
      tools/advisor/test/test_db_log_parser.py
  22. 210
      tools/advisor/test/test_db_options_parser.py
  23. 95
      tools/advisor/test/test_db_stats_fetcher.py
  24. 120
      tools/advisor/test/test_rule_parser.py
  25. 112
      tools/benchmark_ci.py
  26. 8
      tools/block_cache_analyzer/block_cache_pysim_test.py
  27. 1
      tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py
  28. 4
      tools/check_all_python.py
  29. 450
      tools/db_crashtest.py
  30. 546
      tools/ldb_test.py
  31. 45
      tools/write_stress_runner.py

@ -1,19 +1,18 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
try:
from builtins import str
except ImportError:
from __builtin__ import str
from targets_builder import TARGETSBuilder
import fnmatch
import json
import os
import fnmatch
import sys
from targets_builder import TARGETSBuilder
from util import ColorString
# This script generates TARGETS file for Buck.
@ -44,13 +43,13 @@ def parse_src_mk(repo_path):
src_files = {}
for line in open(src_mk):
line = line.strip()
if len(line) == 0 or line[0] == '#':
if len(line) == 0 or line[0] == "#":
continue
if '=' in line:
current_src = line.split('=')[0].strip()
if "=" in line:
current_src = line.split("=")[0].strip()
src_files[current_src] = []
elif '.c' in line:
src_path = line.split('\\')[0].strip()
elif ".c" in line:
src_path = line.split("\\")[0].strip()
src_files[current_src].append(src_path)
return src_files
@ -58,14 +57,16 @@ def parse_src_mk(repo_path):
# get all .cc / .c files
def get_cc_files(repo_path):
cc_files = []
for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in
root = root[(len(repo_path) + 1):]
for root, _dirnames, filenames in os.walk(
repo_path
): # noqa: B007 T25377293 Grandfathered in
root = root[(len(repo_path) + 1) :]
if "java" in root:
# Skip java
continue
for filename in fnmatch.filter(filenames, '*.cc'):
for filename in fnmatch.filter(filenames, "*.cc"):
cc_files.append(os.path.join(root, filename))
for filename in fnmatch.filter(filenames, '*.c'):
for filename in fnmatch.filter(filenames, "*.c"):
cc_files.append(os.path.join(root, filename))
return cc_files
@ -93,14 +94,10 @@ def get_non_parallel_tests(repo_path):
return s
# Parse extra dependencies passed by user from command line
def get_dependencies():
deps_map = {
'': {
'extra_deps': [],
'extra_compiler_flags': []
}
}
deps_map = {"": {"extra_deps": [], "extra_compiler_flags": []}}
if len(sys.argv) < 2:
return deps_map
@ -111,6 +108,7 @@ def get_dependencies():
v = encode_dict(v)
rv[k] = v
return rv
extra_deps = json.loads(sys.argv[1], object_hook=encode_dict)
for target_alias, deps in extra_deps.items():
deps_map[target_alias] = deps
@ -143,73 +141,73 @@ def generate_targets(repo_path, deps_map):
"rocksdb_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"],
deps=[
"//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"])
"//folly/synchronization:distributed_mutex",
],
)
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"],
deps=[
"//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"],
"//folly/synchronization:distributed_mutex",
],
headers=None,
extra_external_deps="",
link_whole=True)
link_whole=True,
)
# rocksdb_test_lib
TARGETS.add_library(
"rocksdb_test_lib",
src_mk.get("MOCK_LIB_SOURCES", []) +
src_mk.get("TEST_LIB_SOURCES", []) +
src_mk.get("EXP_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []),
src_mk.get("MOCK_LIB_SOURCES", [])
+ src_mk.get("TEST_LIB_SOURCES", [])
+ src_mk.get("EXP_LIB_SOURCES", [])
+ src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"],
extra_test_libs=True
)
extra_test_libs=True,
)
# rocksdb_tools_lib
TARGETS.add_library(
"rocksdb_tools_lib",
src_mk.get("BENCH_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []) +
["test_util/testutil.cc"],
[":rocksdb_lib"])
src_mk.get("BENCH_LIB_SOURCES", [])
+ src_mk.get("ANALYZER_LIB_SOURCES", [])
+ ["test_util/testutil.cc"],
[":rocksdb_lib"],
)
# rocksdb_cache_bench_tools_lib
TARGETS.add_library(
"rocksdb_cache_bench_tools_lib",
src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
[":rocksdb_lib"])
[":rocksdb_lib"],
)
# rocksdb_stress_lib
TARGETS.add_rocksdb_library(
"rocksdb_stress_lib",
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
+ src_mk.get("STRESS_LIB_SOURCES", [])
+ ["test_util/testutil.cc"],
)
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
TARGETS.add_binary(
"db_stress", ["db_stress_tool/db_stress.cc"], [":rocksdb_stress_lib"]
)
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]
TARGETS.add_binary(
name,
[src],
[],
extra_bench_libs=True
)
name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0]
TARGETS.add_binary(name, [src], [], extra_bench_libs=True)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path
@ -219,7 +217,7 @@ def generate_targets(repo_path, deps_map):
# are more than one .c test file, we need to extend
# TARGETS.add_c_test() to include other C tests too.
for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
if test_src != 'db/c_test.c':
if test_src != "db/c_test.c":
print("Don't know how to deal with " + test_src)
return False
TARGETS.add_c_test()
@ -229,7 +227,7 @@ def generate_targets(repo_path, deps_map):
fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
benchmarks = config_dict["benchmarks"]
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
@ -237,13 +235,20 @@ def generate_targets(repo_path, deps_map):
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
TARGETS.add_fancy_bench_config(
config_dict["name"],
clean_benchmarks,
False,
config_dict["expected_runtime_one_iter"],
config_dict["sl_iterations"],
config_dict["regression_threshold"],
)
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
benchmarks = config_dict["benchmarks"]
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
@ -252,7 +257,14 @@ def generate_targets(repo_path, deps_map):
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
TARGETS.add_fancy_bench_config(
config_dict["name"] + "_slow",
clean_benchmarks,
True,
config_dict["expected_runtime_one_iter"],
config_dict["sl_iterations"],
config_dict["regression_threshold"],
)
# it is better servicelab experiments break
# than rocksdb github ci
except Exception:
@ -261,7 +273,7 @@ def generate_targets(repo_path, deps_map):
TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip()
test = test_src.split(".c")[0].strip().split("/")[-1].strip()
test_source_map[test] = test_src
print("" + test + " " + test_src)
@ -271,23 +283,29 @@ def generate_targets(repo_path, deps_map):
print(ColorString.warning("Failed to get test name for %s" % test_src))
continue
test_target_name = \
test if not target_alias else test + "_" + target_alias
test_target_name = test if not target_alias else test + "_" + target_alias
if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True)
TARGETS.add_library(
test_library,
[test_src],
deps=[":rocksdb_test_lib"],
extra_test_libs=True,
)
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
deps=json.dumps(deps["extra_deps"] + [":" + test_library]),
extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
)
else:
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
deps=json.dumps(deps["extra_deps"] + [":rocksdb_test_lib"]),
extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]),
)
print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib))
@ -300,8 +318,7 @@ def get_rocksdb_path():
# rocksdb = {script_dir}/..
script_dir = os.path.dirname(sys.argv[0])
script_dir = os.path.abspath(script_dir)
rocksdb_path = os.path.abspath(
os.path.join(script_dir, "../"))
rocksdb_path = os.path.abspath(os.path.join(script_dir, "../"))
return rocksdb_path
@ -318,5 +335,6 @@ def main():
if not ok:
exit_with_error("Failed to generate TARGETS files")
if __name__ == "__main__":
main()

@ -1,113 +1,150 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
try:
from builtins import object
from builtins import str
from builtins import object, str
except ImportError:
from __builtin__ import object
from __builtin__ import str
import targets_cfg
from __builtin__ import object, str
import pprint
import targets_cfg
def pretty_list(lst, indent=8):
if lst is None or len(lst) == 0:
return ""
if len(lst) == 1:
return "\"%s\"" % lst[0]
return '"%s"' % lst[0]
separator = "\",\n%s\"" % (" " * indent)
separator = '",\n%s"' % (" " * indent)
res = separator.join(sorted(lst))
res = "\n" + (" " * indent) + "\"" + res + "\",\n" + (" " * (indent - 4))
res = "\n" + (" " * indent) + '"' + res + '",\n' + (" " * (indent - 4))
return res
class TARGETSBuilder(object):
def __init__(self, path, extra_argv):
self.path = path
self.targets_file = open(path, 'wb')
header = targets_cfg.rocksdb_target_header_template.format(
extra_argv=extra_argv)
self.targets_file.write(header.encode("utf-8"))
extra_argv=extra_argv
)
with open(path, "wb") as targets_file:
targets_file.write(header.encode("utf-8"))
self.total_lib = 0
self.total_bin = 0
self.total_test = 0
self.tests_cfg = ""
def __del__(self):
self.targets_file.close()
def add_library(self, name, srcs, deps=None, headers=None,
extra_external_deps="", link_whole=False,
external_dependencies=None, extra_test_libs=False):
def add_library(
self,
name,
srcs,
deps=None,
headers=None,
extra_external_deps="",
link_whole=False,
external_dependencies=None,
extra_test_libs=False,
):
if headers is not None:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
deps=pretty_list(deps),
extra_external_deps=extra_external_deps,
link_whole=link_whole,
external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs
).encode("utf-8"))
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
deps=pretty_list(deps),
extra_external_deps=extra_external_deps,
link_whole=link_whole,
external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs,
).encode("utf-8")
)
self.total_lib = self.total_lib + 1
def add_rocksdb_library(self, name, srcs, headers=None,
external_dependencies=None):
def add_rocksdb_library(self, name, srcs, headers=None, external_dependencies=None):
if headers is not None:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.rocksdb_library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
external_dependencies=pretty_list(external_dependencies)
).encode("utf-8")
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.rocksdb_library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
external_dependencies=pretty_list(external_dependencies),
).encode("utf-8")
)
self.total_lib = self.total_lib + 1
def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False):
self.targets_file.write(targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8"))
def add_binary(
self,
name,
srcs,
deps=None,
extra_preprocessor_flags=None,
extra_bench_libs=False,
):
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8")
)
self.total_bin = self.total_bin + 1
def add_c_test(self):
self.targets_file.write(b"""
with open(self.path, "ab") as targets_file:
targets_file.write(
b"""
add_c_test_wrapper()
""")
"""
)
def add_test_header(self):
self.targets_file.write(b"""
with open(self.path, "ab") as targets_file:
targets_file.write(
b"""
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
""")
"""
)
def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold):
self.targets_file.write(targets_cfg.fancy_bench_template.format(
def add_fancy_bench_config(
self,
name,
bench_config,
slow,
expected_runtime,
sl_iterations,
regression_threshold,
):
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.fancy_bench_template.format(
name=name,
bench_config=pprint.pformat(bench_config),
slow=slow,
expected_runtime=expected_runtime,
sl_iterations=sl_iterations,
regression_threshold=regression_threshold
).encode("utf-8"))
def register_test(self,
test_name,
src,
deps,
extra_compiler_flags):
regression_threshold=regression_threshold,
).encode("utf-8")
)
self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps,
extra_compiler_flags=extra_compiler_flags).encode("utf-8"))
def register_test(self, test_name, src, deps, extra_compiler_flags):
with open(self.path, "ab") as targets_file:
targets_file.write(
targets_cfg.unittests_template.format(
test_name=test_name,
test_cc=str(src),
deps=deps,
extra_compiler_flags=extra_compiler_flags,
).encode("utf-8")
)
self.total_test = self.total_test + 1

@ -1,11 +1,7 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
rocksdb_target_header_template = \
"""# This file \100generated by:
rocksdb_target_header_template = """# This file \100generated by:
#$ python3 buckifier/buckify_rocksdb.py{extra_argv}
# --> DO NOT EDIT MANUALLY <--
# This file is a Facebook-specific integration for buck builds, so can
@ -27,7 +23,6 @@ rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
"""
binary_template = """
cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
"""

@ -2,37 +2,35 @@
"""
This module keeps commonly used components.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
try:
from builtins import object
except ImportError:
from __builtin__ import object
import os
import subprocess
import sys
import os
import time
class ColorString(object):
""" Generate colorful strings on terminal """
HEADER = '\033[95m'
BLUE = '\033[94m'
GREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
"""Generate colorful strings on terminal"""
HEADER = "\033[95m"
BLUE = "\033[94m"
GREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
@staticmethod
def _make_color_str(text, color):
# In Python2, default encoding for unicode string is ASCII
if sys.version_info.major <= 2:
return "".join(
[color, text.encode('utf-8'), ColorString.ENDC])
return "".join([color, text.encode("utf-8"), ColorString.ENDC])
# From Python3, default encoding for unicode string is UTF-8
return "".join(
[color, text, ColorString.ENDC])
return "".join([color, text, ColorString.ENDC])
@staticmethod
def ok(text):
@ -68,37 +66,38 @@ class ColorString(object):
def run_shell_command(shell_cmd, cmd_dir=None):
""" Run a single shell command.
@returns a tuple of shell command return code, stdout, stderr """
"""Run a single shell command.
@returns a tuple of shell command return code, stdout, stderr"""
if cmd_dir is not None and not os.path.exists(cmd_dir):
run_shell_command("mkdir -p %s" % cmd_dir)
start = time.time()
print("\t>>> Running: " + shell_cmd)
p = subprocess.Popen(shell_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cmd_dir)
p = subprocess.Popen( # noqa
shell_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cmd_dir,
)
stdout, stderr = p.communicate()
end = time.time()
# Report time if we spent more than 5 minutes executing a command
execution_time = end - start
if execution_time > (60 * 5):
mins = (execution_time / 60)
secs = (execution_time % 60)
mins = execution_time / 60
secs = execution_time % 60
print("\t>time spent: %d minutes %d seconds" % (mins, secs))
return p.returncode, stdout, stderr
def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False):
""" Execute a sequence of shell commands, which is equivalent to
running `cmd1 && cmd2 && cmd3`
@returns boolean indication if all commands succeeds.
"""Execute a sequence of shell commands, which is equivalent to
running `cmd1 && cmd2 && cmd3`
@returns boolean indication if all commands succeeds.
"""
if cmd_dir:

@ -28,14 +28,15 @@
from __future__ import print_function
import argparse
from os import path
import re
import sys
from os import path
include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$')
included = set()
excluded = set()
def find_header(name, abs_path, include_paths):
samedir = path.join(path.dirname(abs_path), name)
if path.exists(samedir):
@ -46,17 +47,31 @@ def find_header(name, abs_path, include_paths):
return include_path
return None
def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths):
def expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
):
if include_path in included:
return False
included.add(include_path)
with open(include_path) as f:
print('#line 1 "{}"'.format(include_path), file=source_out)
process_file(f, include_path, source_out, header_out, include_paths, public_include_paths)
process_file(
f, include_path, source_out, header_out, include_paths, public_include_paths
)
return True
def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths):
def process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
):
for (line, text) in enumerate(f):
m = include_re.match(text)
if m:
@ -68,7 +83,15 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl
source_out.write(text)
expanded = False
else:
expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths)
expanded = expand_include(
include_path,
f,
abs_path,
source_out,
header_out,
include_paths,
public_include_paths,
)
else:
# now try public headers
include_path = find_header(filename, abs_path, public_include_paths)
@ -78,23 +101,52 @@ def process_file(f, abs_path, source_out, header_out, include_paths, public_incl
if include_path in excluded:
source_out.write(text)
else:
expand_include(include_path, f, abs_path, header_out, None, public_include_paths, [])
expand_include(
include_path,
f,
abs_path,
header_out,
None,
public_include_paths,
[],
)
else:
sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line))
sys.exit(
"unable to find {}, included in {} on line {}".format(
filename, abs_path, line
)
)
if expanded:
print('#line {} "{}"'.format(line+1, abs_path), file=source_out)
print('#line {} "{}"'.format(line + 1, abs_path), file=source_out)
elif text != "#pragma once\n":
source_out.write(text)
def main():
parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation")
parser = argparse.ArgumentParser(
description="Transform a unity build into an amalgamation"
)
parser.add_argument("source", help="source file")
parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers")
parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers")
parser.add_argument("-x", action="append", dest="excluded", help="excluded header files")
parser.add_argument(
"-I",
action="append",
dest="include_paths",
help="include paths for private headers",
)
parser.add_argument(
"-i",
action="append",
dest="public_include_paths",
help="include paths for public headers",
)
parser.add_argument(
"-x", action="append", dest="excluded", help="excluded header files"
)
parser.add_argument("-o", dest="source_out", help="output C++ file", required=True)
parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True)
parser.add_argument(
"-H", dest="header_out", help="output C++ header file", required=True
)
args = parser.parse_args()
include_paths = list(map(path.abspath, args.include_paths or []))
@ -102,10 +154,15 @@ def main():
excluded.update(map(path.abspath, args.excluded or []))
filename = args.source
abs_path = path.abspath(filename)
with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out:
with open(filename) as f, open(args.source_out, "w") as source_out, open(
args.header_out, "w"
) as header_out:
print('#line 1 "{}"'.format(filename), file=source_out)
print('#include "{}"'.format(header_out.name), file=source_out)
process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths)
process_file(
f, abs_path, source_out, header_out, include_paths, public_include_paths
)
if __name__ == "__main__":
main()

@ -4,23 +4,27 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
'''Access the results of benchmark runs
"""Access the results of benchmark runs
Send these results on to OpenSearch graphing service
'''
"""
import argparse
import itertools
import logging
import os
import re
import sys
import requests
from dateutil import parser
import logging
logging.basicConfig(level=logging.DEBUG)
class Configuration:
opensearch_user = os.environ['ES_USER']
opensearch_pass = os.environ['ES_PASS']
opensearch_user = os.environ["ES_USER"]
opensearch_pass = os.environ["ES_PASS"]
class BenchmarkResultException(Exception):
def __init__(self, message, content):
@ -30,45 +34,71 @@ class BenchmarkResultException(Exception):
class BenchmarkUtils:
expected_keys = ['ops_sec', 'mb_sec', 'lsm_sz', 'blob_sz', 'c_wgb', 'w_amp',
'c_mbps', 'c_wsecs', 'c_csecs', 'b_rgb', 'b_wgb', 'usec_op',
'p50', 'p99', 'p99.9', 'p99.99', 'pmax',
'uptime', 'stall%', 'Nstall', 'u_cpu', 's_cpu', 'rss', 'test', 'date', 'version', 'job_id']
expected_keys = [
"ops_sec",
"mb_sec",
"lsm_sz",
"blob_sz",
"c_wgb",
"w_amp",
"c_mbps",
"c_wsecs",
"c_csecs",
"b_rgb",
"b_wgb",
"usec_op",
"p50",
"p99",
"p99.9",
"p99.99",
"pmax",
"uptime",
"stall%",
"Nstall",
"u_cpu",
"s_cpu",
"rss",
"test",
"date",
"version",
"job_id",
]
def sanity_check(row):
if not 'test' in row:
if "test" not in row:
logging.debug(f"not 'test' in row: {row}")
return False
if row['test'] == '':
if row["test"] == "":
logging.debug(f"row['test'] == '': {row}")
return False
if not 'date' in row:
if "date" not in row:
logging.debug(f"not 'date' in row: {row}")
return False
if not 'ops_sec' in row:
if "ops_sec" not in row:
logging.debug(f"not 'ops_sec' in row: {row}")
return False
try:
v = int(row['ops_sec'])
_ = int(row["ops_sec"])
except (ValueError, TypeError):
logging.debug(f"int(row['ops_sec']): {row}")
return False
try:
(_, _) = parser.parse(row['date'], fuzzy_with_tokens=True)
(_, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
except (parser.ParserError):
logging.error(f"parser.parse((row['date']): not a valid format for date in row: {row}")
logging.error(
f"parser.parse((row['date']): not a valid format for date in row: {row}"
)
return False
return True
def conform_opensearch(row):
(dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True)
(dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
# create a test_date field, which was previously what was expected
# repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
# e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
row['test_date'] = dt.isoformat()
row['date'] = dt.isoformat()
return dict((key.replace('.', '_'), value)
for (key, value) in row.items())
row["test_date"] = dt.isoformat()
row["date"] = dt.isoformat()
return {key.replace(".", "_") : value for key, value in row.items()}
class ResultParser:
@ -80,24 +110,24 @@ class ResultParser:
def ignore(self, l_in: str):
if len(l_in) == 0:
return True
if l_in[0:1] == '#':
if l_in[0:1] == "#":
return True
return False
def line(self, l_in: str):
'''Parse a line into items
def line(self, line_in: str):
"""Parse a line into items
Being clever about separators
'''
l = l_in
"""
line = line_in
row = []
while l != '':
match_item = self.field.match(l)
while line != "":
match_item = self.field.match(line)
if match_item:
item = match_item.group(0)
row.append(item)
l = l[len(item):]
line = line[len(item) :]
else:
match_intra = self.intra.match(l)
match_intra = self.intra.match(line)
if match_intra:
intra = match_intra.group(0)
# Count the separators
@ -107,26 +137,27 @@ class ResultParser:
sep_count = len(tabbed) - 1
if sep_count == 0:
sep_count = 1
for i in range(sep_count-1):
row.append('')
l = l[len(intra):]
for _ in range(sep_count - 1):
row.append("")
line = line[len(intra) :]
else:
raise BenchmarkResultException(
'Invalid TSV line', f"{l_in} at {l}")
raise BenchmarkResultException("Invalid TSV line", f"{line_in} at {line}")
return row
def parse(self, lines):
'''Parse something that iterates lines'''
"""Parse something that iterates lines"""
rows = [self.line(line) for line in lines if not self.ignore(line)]
header = rows[0]
width = len(header)
records = [{k: v for (k, v) in itertools.zip_longest(
header, row[:width])} for row in rows[1:]]
records = [
{k: v for (k, v) in itertools.zip_longest(header, row[:width])}
for row in rows[1:]
]
return records
def load_report_from_tsv(filename: str):
file = open(filename, 'r')
file = open(filename, "r")
contents = file.readlines()
file.close()
parser = ResultParser()
@ -136,18 +167,27 @@ def load_report_from_tsv(filename: str):
def push_report_to_opensearch(report, esdocument):
sanitized = [BenchmarkUtils.conform_opensearch(row)
for row in report if BenchmarkUtils.sanity_check(row)]
logging.debug(f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch")
sanitized = [
BenchmarkUtils.conform_opensearch(row)
for row in report
if BenchmarkUtils.sanity_check(row)
]
logging.debug(
f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch"
)
for single_benchmark in sanitized:
logging.debug(f"upload benchmark: {single_benchmark}")
response = requests.post(
esdocument,
json=single_benchmark, auth=(os.environ['ES_USER'], os.environ['ES_PASS']))
json=single_benchmark,
auth=(os.environ["ES_USER"], os.environ["ES_PASS"]),
)
logging.debug(
f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}")
f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}"
)
response.raise_for_status()
def push_report_to_null(report):
for row in report:
@ -156,32 +196,41 @@ def push_report_to_null(report):
conformed = BenchmarkUtils.conform_opensearch(row)
logging.debug(f"conformed row {conformed}")
def main():
'''Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
"""Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
This tool will
(1) Open a local tsv benchmark report file
(2) Upload to OpenSearch document, via https/JSON
'''
"""
parser = argparse.ArgumentParser(
description='CircleCI benchmark scraper.')
parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.")
# --tsvfile is the name of the file to read results from
# --esdocument is the ElasticSearch document to push these results into
#
parser.add_argument('--tsvfile', default='build_tools/circle_api_scraper_input.txt',
help='File from which to read tsv report')
parser.add_argument('--esdocument', help='ElasticSearch/OpenSearch document URL to upload report into')
parser.add_argument('--upload', choices=['opensearch', 'none'], default='opensearch')
parser.add_argument(
"--tsvfile",
default="build_tools/circle_api_scraper_input.txt",
help="File from which to read tsv report",
)
parser.add_argument(
"--esdocument",
help="ElasticSearch/OpenSearch document URL to upload report into",
)
parser.add_argument(
"--upload", choices=["opensearch", "none"], default="opensearch"
)
args = parser.parse_args()
logging.debug(f"Arguments: {args}")
reports = load_report_from_tsv(args.tsvfile)
if (args.upload == 'opensearch'):
if args.upload == "opensearch":
push_report_to_opensearch(reports, args.esdocument)
else:
push_report_to_null(reports)
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

@ -3,16 +3,13 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
'''Filter for error messages in test output:
"""Filter for error messages in test output:
- Receives merged stdout/stderr from test on stdin
- Finds patterns of known error messages for test name (first argument)
- Prints those error messages to stdout
'''
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
import re
import sys
@ -20,23 +17,24 @@ import sys
class ErrorParserBase(object):
def parse_error(self, line):
'''Parses a line of test output. If it contains an error, returns a
"""Parses a line of test output. If it contains an error, returns a
formatted message describing the error; otherwise, returns None.
Subclasses must override this method.
'''
"""
raise NotImplementedError
class GTestErrorParser(ErrorParserBase):
'''A parser that remembers the last test that began running so it can print
"""A parser that remembers the last test that began running so it can print
that test's name upon detecting failure.
'''
_GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$')
"""
_GTEST_NAME_PATTERN = re.compile(r"\[ RUN \] (\S+)$")
# format: '<filename or "unknown file">:<line #>: Failure'
_GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$')
_GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$")
def __init__(self):
self._last_gtest_name = 'Unknown test'
self._last_gtest_name = "Unknown test"
def parse_error(self, line):
gtest_name_match = self._GTEST_NAME_PATTERN.match(line)
@ -45,14 +43,13 @@ class GTestErrorParser(ErrorParserBase):
return None
gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
if gtest_fail_match:
return '%s failed: %s' % (
self._last_gtest_name, gtest_fail_match.group(1))
return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1))
return None
class MatchErrorParser(ErrorParserBase):
'''A simple parser that returns the whole line if it matches the pattern.
'''
"""A simple parser that returns the whole line if it matches the pattern."""
def __init__(self, pattern):
self._pattern = re.compile(pattern)
@ -69,97 +66,104 @@ class CompilerErrorParser(MatchErrorParser):
# format (link error):
# '<filename>:<line #>: error: <error msg>'
# The below regex catches both
super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:')
super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:")
class ScanBuildErrorParser(MatchErrorParser):
def __init__(self):
super(ScanBuildErrorParser, self).__init__(
r'scan-build: \d+ bugs found.$')
super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$")
class DbCrashErrorParser(MatchErrorParser):
def __init__(self):
super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.')
super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.")
class WriteStressErrorParser(MatchErrorParser):
def __init__(self):
super(WriteStressErrorParser, self).__init__(
r'ERROR: write_stress died with exitcode=\d+')
r"ERROR: write_stress died with exitcode=\d+"
)
class AsanErrorParser(MatchErrorParser):
def __init__(self):
super(AsanErrorParser, self).__init__(
r'==\d+==ERROR: AddressSanitizer:')
super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:")
class UbsanErrorParser(MatchErrorParser):
def __init__(self):
# format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:')
super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:")
class ValgrindErrorParser(MatchErrorParser):
def __init__(self):
# just grab the summary, valgrind doesn't clearly distinguish errors
# from other log messages.
super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:')
super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:")
class CompatErrorParser(MatchErrorParser):
def __init__(self):
super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$')
super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$")
class TsanErrorParser(MatchErrorParser):
def __init__(self):
super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:')
super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:")
_TEST_NAME_TO_PARSERS = {
'punit': [CompilerErrorParser, GTestErrorParser],
'unit': [CompilerErrorParser, GTestErrorParser],
'release': [CompilerErrorParser, GTestErrorParser],
'unit_481': [CompilerErrorParser, GTestErrorParser],
'release_481': [CompilerErrorParser, GTestErrorParser],
'clang_unit': [CompilerErrorParser, GTestErrorParser],
'clang_release': [CompilerErrorParser, GTestErrorParser],
'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser],
'code_cov': [CompilerErrorParser, GTestErrorParser],
'unity': [CompilerErrorParser, GTestErrorParser],
'lite': [CompilerErrorParser],
'lite_test': [CompilerErrorParser, GTestErrorParser],
'stress_crash': [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_txn': [CompilerErrorParser, DbCrashErrorParser],
'write_stress': [CompilerErrorParser, WriteStressErrorParser],
'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'asan_crash_with_txn': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'ubsan_crash_with_txn': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
'format_compatible': [CompilerErrorParser, CompatErrorParser],
'run_format_compatible': [CompilerErrorParser, CompatErrorParser],
'no_compression': [CompilerErrorParser, GTestErrorParser],
'run_no_compression': [CompilerErrorParser, GTestErrorParser],
'regression': [CompilerErrorParser],
'run_regression': [CompilerErrorParser],
"punit": [CompilerErrorParser, GTestErrorParser],
"unit": [CompilerErrorParser, GTestErrorParser],
"release": [CompilerErrorParser, GTestErrorParser],
"unit_481": [CompilerErrorParser, GTestErrorParser],
"release_481": [CompilerErrorParser, GTestErrorParser],
"clang_unit": [CompilerErrorParser, GTestErrorParser],
"clang_release": [CompilerErrorParser, GTestErrorParser],
"clang_analyze": [CompilerErrorParser, ScanBuildErrorParser],
"code_cov": [CompilerErrorParser, GTestErrorParser],
"unity": [CompilerErrorParser, GTestErrorParser],
"lite": [CompilerErrorParser],
"lite_test": [CompilerErrorParser, GTestErrorParser],
"stress_crash": [CompilerErrorParser, DbCrashErrorParser],
"stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser],
"stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser],
"write_stress": [CompilerErrorParser, WriteStressErrorParser],
"asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
"asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
"asan_crash_with_atomic_flush": [
CompilerErrorParser,
AsanErrorParser,
DbCrashErrorParser,
],
"asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
"ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
"ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
"ubsan_crash_with_atomic_flush": [
CompilerErrorParser,
UbsanErrorParser,
DbCrashErrorParser,
],
"ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
"valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
"tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
"format_compatible": [CompilerErrorParser, CompatErrorParser],
"run_format_compatible": [CompilerErrorParser, CompatErrorParser],
"no_compression": [CompilerErrorParser, GTestErrorParser],
"run_no_compression": [CompilerErrorParser, GTestErrorParser],
"regression": [CompilerErrorParser],
"run_regression": [CompilerErrorParser],
}
def main():
if len(sys.argv) != 2:
return 'Usage: %s <test name>' % sys.argv[0]
return "Usage: %s <test name>" % sys.argv[0]
test_name = sys.argv[1]
if test_name not in _TEST_NAME_TO_PARSERS:
return 'Unknown test name: %s' % test_name
return "Unknown test name: %s" % test_name
error_parsers = []
for parser_cls in _TEST_NAME_TO_PARSERS[test_name]:
@ -173,5 +177,5 @@ def main():
print(error_msg)
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

@ -47,35 +47,39 @@ def parse_gcov_report(gcov_input):
return per_file_coverage, total_coverage
def get_option_parser():
usage = "Parse the gcov output and generate more human-readable code " +\
"coverage report."
usage = (
"Parse the gcov output and generate more human-readable code "
+ "coverage report."
)
parser = optparse.OptionParser(usage)
parser.add_option(
"--interested-files", "-i",
"--interested-files",
"-i",
dest="filenames",
help="Comma separated files names. if specified, we will display " +
"the coverage report only for interested source files. " +
"Otherwise we will display the coverage report for all " +
"source files."
help="Comma separated files names. if specified, we will display "
+ "the coverage report only for interested source files. "
+ "Otherwise we will display the coverage report for all "
+ "source files.",
)
return parser
def display_file_coverage(per_file_coverage, total_coverage):
# To print out auto-adjustable column, we need to know the longest
# length of file names.
max_file_name_length = max(
len(fname) for fname in per_file_coverage.keys()
)
max_file_name_length = max(len(fname) for fname in per_file_coverage.keys())
# -- Print header
# size of separator is determined by 3 column sizes:
# file name, coverage percentage and lines.
header_template = \
"%" + str(max_file_name_length) + "s\t%s\t%s"
header_template = "%" + str(max_file_name_length) + "s\t%s\t%s"
separator = "-" * (max_file_name_length + 10 + 20)
print(header_template % ("Filename", "Coverage", "Lines")) # noqa: E999 T25377293 Grandfathered in
print(
header_template % ("Filename", "Coverage", "Lines")
) # noqa: E999 T25377293 Grandfathered in
print(separator)
# -- Print body
@ -91,13 +95,14 @@ def display_file_coverage(per_file_coverage, total_coverage):
print(separator)
print(record_template % ("Total", total_coverage[0], total_coverage[1]))
def report_coverage():
parser = get_option_parser()
(options, args) = parser.parse_args()
interested_files = set()
if options.filenames is not None:
interested_files = set(f.strip() for f in options.filenames.split(','))
interested_files = {f.strip() for f in options.filenames.split(",")}
# To make things simple, right now we only read gcov report from the input
per_file_coverage, total_coverage = parse_gcov_report(sys.stdin)
@ -105,7 +110,8 @@ def report_coverage():
# Check if we need to display coverage info for interested files.
if len(interested_files):
per_file_coverage = dict(
(fname, per_file_coverage[fname]) for fname in interested_files
(fname, per_file_coverage[fname])
for fname in interested_files
if fname in per_file_coverage
)
# If we only interested in several files, it makes no sense to report
@ -117,5 +123,6 @@ def report_coverage():
return
display_file_coverage(per_file_coverage, total_coverage)
if __name__ == "__main__":
report_coverage()

@ -3,8 +3,8 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from abc import ABC, abstractmethod
import re
from abc import ABC, abstractmethod
class BenchmarkRunner(ABC):
@ -25,15 +25,15 @@ class BenchmarkRunner(ABC):
# 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is
# not specified in the OPTIONS file, then the location of the log file
# will be /dev/shm and the name of the file will be 'LOG'
file_name = ''
file_name = ""
if log_dir:
# refer GetInfoLogPrefix() in rocksdb/util/filename.cc
# example db_path: /dev/shm/dbbench
file_name = db_path[1:] # to ignore the leading '/' character
to_be_replaced = re.compile('[^0-9a-zA-Z\-_\.]')
to_be_replaced = re.compile("[^0-9a-zA-Z\-_\.]") # noqa
for character in to_be_replaced.findall(db_path):
file_name = file_name.replace(character, '_')
if not file_name.endswith('_'):
file_name += '_'
file_name += 'LOG'
file_name = file_name.replace(character, "_")
if not file_name.endswith("_"):
file_name += "_"
file_name += "LOG"
return file_name

@ -4,6 +4,7 @@
# (found in the LICENSE.Apache file in the root directory).
import argparse
from advisor.db_config_optimizer import ConfigOptimizer
from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions
@ -23,44 +24,35 @@ def main(args):
bench_runner_class = getattr(bench_runner_module, args.benchrunner_class)
ods_args = {}
if args.ods_client and args.ods_entity:
ods_args['client_script'] = args.ods_client
ods_args['entity'] = args.ods_entity
ods_args["client_script"] = args.ods_client
ods_args["entity"] = args.ods_entity
if args.ods_key_prefix:
ods_args['key_prefix'] = args.ods_key_prefix
ods_args["key_prefix"] = args.ods_key_prefix
db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args)
# initialise the database configuration
db_options = DatabaseOptions(args.rocksdb_options, args.misc_options)
# set the frequency at which stats are dumped in the LOG file and the
# location of the LOG file.
db_log_dump_settings = {
"DBOptions.stats_dump_period_sec": {
NO_COL_FAMILY: args.stats_dump_period_sec
}
"DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: args.stats_dump_period_sec}
}
db_options.update_options(db_log_dump_settings)
# initialise the configuration optimizer
config_optimizer = ConfigOptimizer(
db_bench_runner,
db_options,
rule_spec_parser,
args.base_db_path
db_bench_runner, db_options, rule_spec_parser, args.base_db_path
)
# run the optimiser to improve the database configuration for given
# benchmarks, with the help of expert-specified rules
final_db_options = config_optimizer.run()
# generate the final rocksdb options file
print(
'Final configuration in: ' +
final_db_options.generate_options_config('final')
)
print(
'Final miscellaneous options: ' +
repr(final_db_options.get_misc_options())
"Final configuration in: " + final_db_options.generate_options_config("final")
)
print("Final miscellaneous options: " + repr(final_db_options.get_misc_options()))
if __name__ == '__main__':
'''
if __name__ == "__main__":
"""
An example run of this tool from the command-line would look like:
python3 -m advisor.config_optimizer_example
--base_db_path=/tmp/rocksdbtest-155919/dbbench
@ -69,66 +61,80 @@ if __name__ == '__main__':
--benchrunner_module=advisor.db_bench_runner
--benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench
readwhilewriting use_existing_db=true duration=90
'''
parser = argparse.ArgumentParser(description='This script is used for\
searching for a better database configuration')
"""
parser = argparse.ArgumentParser(
description="This script is used for\
searching for a better database configuration"
)
parser.add_argument(
'--rocksdb_options', required=True, type=str,
help='path of the starting Rocksdb OPTIONS file'
"--rocksdb_options",
required=True,
type=str,
help="path of the starting Rocksdb OPTIONS file",
)
# these are options that are column-family agnostic and are not yet
# supported by the Rocksdb Options file: eg. bloom_bits=2
parser.add_argument(
'--misc_options', nargs='*',
help='whitespace-separated list of options that are not supported ' +
'by the Rocksdb OPTIONS file, given in the ' +
'<option_name>=<option_value> format eg. "bloom_bits=2 ' +
'rate_limiter_bytes_per_sec=128000000"')
parser.add_argument(
'--base_db_path', required=True, type=str,
help='path for the Rocksdb database'
"--misc_options",
nargs="*",
help="whitespace-separated list of options that are not supported "
+ "by the Rocksdb OPTIONS file, given in the "
+ '<option_name>=<option_value> format eg. "bloom_bits=2 '
+ 'rate_limiter_bytes_per_sec=128000000"',
)
parser.add_argument(
'--rules_spec', required=True, type=str,
help='path of the file containing the expert-specified Rules'
"--base_db_path", required=True, type=str, help="path for the Rocksdb database"
)
parser.add_argument(
'--stats_dump_period_sec', required=True, type=int,
help='the frequency (in seconds) at which STATISTICS are printed to ' +
'the Rocksdb LOG file'
"--rules_spec",
required=True,
type=str,
help="path of the file containing the expert-specified Rules",
)
# ODS arguments
parser.add_argument(
'--ods_client', type=str, help='the ODS client binary'
"--stats_dump_period_sec",
required=True,
type=int,
help="the frequency (in seconds) at which STATISTICS are printed to "
+ "the Rocksdb LOG file",
)
# ODS arguments
parser.add_argument("--ods_client", type=str, help="the ODS client binary")
parser.add_argument(
'--ods_entity', type=str,
help='the servers for which the ODS stats need to be fetched'
"--ods_entity",
type=str,
help="the servers for which the ODS stats need to be fetched",
)
parser.add_argument(
'--ods_key_prefix', type=str,
help='the prefix that needs to be attached to the keys of time ' +
'series to be fetched from ODS'
"--ods_key_prefix",
type=str,
help="the prefix that needs to be attached to the keys of time "
+ "series to be fetched from ODS",
)
# benchrunner_module example: advisor.db_benchmark_client
parser.add_argument(
'--benchrunner_module', required=True, type=str,
help='the module containing the BenchmarkRunner class to be used by ' +
'the Optimizer, example: advisor.db_bench_runner'
"--benchrunner_module",
required=True,
type=str,
help="the module containing the BenchmarkRunner class to be used by "
+ "the Optimizer, example: advisor.db_bench_runner",
)
# benchrunner_class example: DBBenchRunner
parser.add_argument(
'--benchrunner_class', required=True, type=str,
help='the name of the BenchmarkRunner class to be used by the ' +
'Optimizer, should be present in the module provided in the ' +
'benchrunner_module argument, example: DBBenchRunner'
"--benchrunner_class",
required=True,
type=str,
help="the name of the BenchmarkRunner class to be used by the "
+ "Optimizer, should be present in the module provided in the "
+ "benchrunner_module argument, example: DBBenchRunner",
)
parser.add_argument(
'--benchrunner_pos_args', nargs='*',
help='whitespace-separated positional arguments that are passed on ' +
'to the constructor of the BenchmarkRunner class provided in the ' +
'benchrunner_class argument, example: "use_existing_db=true ' +
'duration=900"'
"--benchrunner_pos_args",
nargs="*",
help="whitespace-separated positional arguments that are passed on "
+ "to the constructor of the BenchmarkRunner class provided in the "
+ 'benchrunner_class argument, example: "use_existing_db=true '
+ 'duration=900"',
)
args = parser.parse_args()
main(args)

@ -3,19 +3,22 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.bench_runner import BenchmarkRunner
from advisor.db_log_parser import DataSource, DatabaseLogs, NO_COL_FAMILY
from advisor.db_stats_fetcher import (
LogStatsParser, OdsStatsFetcher, DatabasePerfContext
)
import shutil
import subprocess
import time
from advisor.bench_runner import BenchmarkRunner
from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
from advisor.db_stats_fetcher import (
DatabasePerfContext,
LogStatsParser,
OdsStatsFetcher,
)
'''
"""
NOTE: This is not thread-safe, because the output file is simply overwritten.
'''
"""
class DBBenchRunner(BenchmarkRunner):
@ -37,9 +40,7 @@ class DBBenchRunner(BenchmarkRunner):
optional_args_str = ""
for option_name, option_value in misc_options_dict.items():
if option_value:
optional_args_str += (
" --" + option_name + "=" + str(option_value)
)
optional_args_str += " --" + option_name + "=" + str(option_value)
return optional_args_str
def __init__(self, positional_args, ods_args=None):
@ -54,19 +55,17 @@ class DBBenchRunner(BenchmarkRunner):
self.ods_args = ods_args
def _parse_output(self, get_perf_context=False):
'''
"""
Sample db_bench output after running 'readwhilewriting' benchmark:
DB path: [/tmp/rocksdbtest-155919/dbbench]\n
readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
of 5427999 found)\n
PERF_CONTEXT:\n
user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
'''
output = {
self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None
}
"""
output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
perf_context_begins = False
with open(self.OUTPUT_FILE, 'r') as fp:
with open(self.OUTPUT_FILE, "r") as fp:
for line in fp:
if line.startswith(self.benchmark):
# line from sample output:
@ -77,9 +76,7 @@ class DBBenchRunner(BenchmarkRunner):
for ix, token in enumerate(token_list):
if token.startswith(self.THROUGHPUT):
# in above example, throughput = 60305 ops/sec
output[self.THROUGHPUT] = (
float(token_list[ix - 1])
)
output[self.THROUGHPUT] = float(token_list[ix - 1])
break
elif get_perf_context and line.startswith(self.PERF_CON):
# the following lines in the output contain perf context
@ -89,11 +86,11 @@ class DBBenchRunner(BenchmarkRunner):
# Sample perf_context output:
# user_key_comparison_count = 500, block_cache_hit_count =\
# 468, block_read_count = 580, block_read_byte = 445, ...
token_list = line.strip().split(',')
token_list = line.strip().split(",")
# token_list = ['user_key_comparison_count = 500',
# 'block_cache_hit_count = 468','block_read_count = 580'...
perf_context = {
tk.split('=')[0].strip(): tk.split('=')[1].strip()
tk.split("=")[0].strip(): tk.split("=")[1].strip()
for tk in token_list
if tk
}
@ -103,17 +100,13 @@ class DBBenchRunner(BenchmarkRunner):
timestamp = int(time.time())
perf_context_ts = {}
for stat in perf_context.keys():
perf_context_ts[stat] = {
timestamp: int(perf_context[stat])
}
perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
output[self.PERF_CON] = perf_context_ts
perf_context_begins = False
elif line.startswith(self.DB_PATH):
# line from sample output:
# DB path: [/tmp/rocksdbtest-155919/dbbench]\n
output[self.DB_PATH] = (
line.split('[')[1].split(']')[0]
)
output[self.DB_PATH] = line.split("[")[1].split("]")[0]
return output
def get_log_options(self, db_options, db_path):
@ -124,40 +117,38 @@ class DBBenchRunner(BenchmarkRunner):
logs_file_prefix = None
# fetch frequency at which the stats are dumped in the Rocksdb logs
dump_period = 'DBOptions.stats_dump_period_sec'
dump_period = "DBOptions.stats_dump_period_sec"
# fetch the directory, if specified, in which the Rocksdb logs are
# dumped, by default logs are dumped in same location as database
log_dir = 'DBOptions.db_log_dir'
log_dir = "DBOptions.db_log_dir"
log_options = db_options.get_options([dump_period, log_dir])
if dump_period in log_options:
stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
if log_dir in log_options:
log_dir_path = log_options[log_dir][NO_COL_FAMILY]
log_file_name = DBBenchRunner.get_info_log_file_name(
log_dir_path, db_path
)
log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)
if not log_dir_path:
log_dir_path = db_path
if not log_dir_path.endswith('/'):
log_dir_path += '/'
if not log_dir_path.endswith("/"):
log_dir_path += "/"
logs_file_prefix = log_dir_path + log_file_name
return (logs_file_prefix, stats_freq_sec)
def _get_options_command_line_args_str(self, curr_options):
'''
"""
This method uses the provided Rocksdb OPTIONS to create a string of
command-line arguments for db_bench.
The --options_file argument is always given and the options that are
not supported by the OPTIONS file are given as separate arguments.
'''
"""
optional_args_str = DBBenchRunner.get_opt_args_str(
curr_options.get_misc_options()
)
# generate an options configuration file
options_file = curr_options.generate_options_config(nonce='12345')
options_file = curr_options.generate_options_config(nonce="12345")
optional_args_str += " --options_file=" + options_file
return optional_args_str
@ -166,10 +157,11 @@ class DBBenchRunner(BenchmarkRunner):
try:
shutil.rmtree(db_path, ignore_errors=True)
except OSError as e:
print('Error: rmdir ' + e.filename + ' ' + e.strerror)
print("Error: rmdir " + e.filename + " " + e.strerror)
# setup database with a million keys using the fillrandom benchmark
command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
self.db_bench_binary, db_path
self.db_bench_binary,
db_path,
)
args_str = self._get_options_command_line_args_str(curr_options)
command += args_str
@ -177,21 +169,23 @@ class DBBenchRunner(BenchmarkRunner):
def _build_experiment_command(self, curr_options, db_path):
command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
self.db_bench_binary, self.benchmark, db_path
self.db_bench_binary,
self.benchmark,
db_path,
)
# fetch the command-line arguments string for providing Rocksdb options
args_str = self._get_options_command_line_args_str(curr_options)
# handle the command-line args passed in the constructor, these
# arguments are specific to db_bench
for cmd_line_arg in self.db_bench_args:
args_str += (" --" + cmd_line_arg)
args_str += " --" + cmd_line_arg
command += args_str
return command
def _run_command(self, command):
out_file = open(self.OUTPUT_FILE, "w+")
err_file = open(self.ERROR_FILE, "w+")
print('executing... - ' + command)
print("executing... - " + command)
subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
out_file.close()
err_file.close()
@ -214,32 +208,30 @@ class DBBenchRunner(BenchmarkRunner):
db_options, parsed_output[self.DB_PATH]
)
# create the Rocksbd LOGS object
db_logs = DatabaseLogs(
logs_file_prefix, db_options.get_column_families()
)
db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
# Create the Log STATS object
db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
# Create the PerfContext STATS object
db_perf_context = DatabasePerfContext(
parsed_output[self.PERF_CON], 0, False
)
db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
# create the data-sources dictionary
data_sources = {
DataSource.Type.DB_OPTIONS: [db_options],
DataSource.Type.LOG: [db_logs],
DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context]
DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
}
# Create the ODS STATS object
if self.ods_args:
key_prefix = ''
if 'key_prefix' in self.ods_args:
key_prefix = self.ods_args['key_prefix']
data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher(
self.ods_args['client_script'],
self.ods_args['entity'],
experiment_start_time,
experiment_end_time,
key_prefix
))
key_prefix = ""
if "key_prefix" in self.ods_args:
key_prefix = self.ods_args["key_prefix"]
data_sources[DataSource.Type.TIME_SERIES].append(
OdsStatsFetcher(
self.ods_args["client_script"],
self.ods_args["entity"],
experiment_start_time,
experiment_end_time,
key_prefix,
)
)
# return the experiment's data-sources and throughput
return data_sources, parsed_output[self.THROUGHPUT]

@ -3,16 +3,17 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
import copy
import random
from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import Suggestion
import copy
import random
class ConfigOptimizer:
SCOPE = 'scope'
SUGG_VAL = 'suggested values'
SCOPE = "scope"
SUGG_VAL = "suggested values"
@staticmethod
def apply_action_on_value(old_value, action, suggested_values):
@ -21,7 +22,7 @@ class ConfigOptimizer:
chosen_sugg_val = random.choice(list(suggested_values))
new_value = None
if action is Suggestion.Action.set or not old_value:
assert(chosen_sugg_val)
assert chosen_sugg_val
new_value = chosen_sugg_val
else:
# For increase/decrease actions, currently the code tries to make
@ -61,8 +62,8 @@ class ConfigOptimizer:
# A Suggestion in the rules spec must have the 'option' and
# 'action' fields defined, always call perform_checks() method
# after parsing the rules file using RulesSpec
assert(option)
assert(action)
assert option
assert action
required_options.append(option)
rule_suggestions.append(suggestions_dict[sugg_name])
current_config = options.get_options(required_options)
@ -87,8 +88,9 @@ class ConfigOptimizer:
updated_config[sugg.option][col_fam] = new_value
except AssertionError:
print(
'WARNING(ConfigOptimizer): provide suggested_values ' +
'for ' + sugg.option
"WARNING(ConfigOptimizer): provide suggested_values "
+ "for "
+ sugg.option
)
continue
# case: when the option is present in the current configuration
@ -103,8 +105,9 @@ class ConfigOptimizer:
updated_config[sugg.option][NO_COL_FAMILY] = new_value
except AssertionError:
print(
'WARNING(ConfigOptimizer): provide suggested_values ' +
'for ' + sugg.option
"WARNING(ConfigOptimizer): provide suggested_values "
+ "for "
+ sugg.option
)
else:
for col_fam in rule.get_trigger_column_families():
@ -120,15 +123,16 @@ class ConfigOptimizer:
updated_config[sugg.option][col_fam] = new_value
except AssertionError:
print(
'WARNING(ConfigOptimizer): provide ' +
'suggested_values for ' + sugg.option
"WARNING(ConfigOptimizer): provide "
+ "suggested_values for "
+ sugg.option
)
return current_config, updated_config
@staticmethod
def pick_rule_to_apply(rules, last_rule_name, rules_tried, backtrack):
if not rules:
print('\nNo more rules triggered!')
print("\nNo more rules triggered!")
return None
# if the last rule provided an improvement in the database performance,
# and it was triggered again (i.e. it is present in 'rules'), then pick
@ -143,7 +147,7 @@ class ConfigOptimizer:
for rule in rules:
if rule.name not in rules_tried:
return rule
print('\nAll rules have been exhausted')
print("\nAll rules have been exhausted")
return None
@staticmethod
@ -153,13 +157,13 @@ class ConfigOptimizer:
rules_tried,
backtrack,
curr_options,
suggestions_dict
suggestions_dict,
):
curr_rule = ConfigOptimizer.pick_rule_to_apply(
triggered_rules, current_rule_name, rules_tried, backtrack
)
if not curr_rule:
return tuple([None]*4)
return tuple([None] * 4)
# if a rule has been picked for improving db_config, update rules_tried
rules_tried.add(curr_rule.name)
# get updated config based on the picked rule
@ -168,17 +172,20 @@ class ConfigOptimizer:
)
conf_diff = DatabaseOptions.get_options_diff(curr_conf, updated_conf)
if not conf_diff: # the current and updated configs are the same
curr_rule, rules_tried, curr_conf, updated_conf = (
ConfigOptimizer.apply_suggestions(
triggered_rules,
None,
rules_tried,
backtrack,
curr_options,
suggestions_dict
)
(
curr_rule,
rules_tried,
curr_conf,
updated_conf,
) = ConfigOptimizer.apply_suggestions(
triggered_rules,
None,
rules_tried,
backtrack,
curr_options,
suggestions_dict,
)
print('returning from apply_suggestions')
print("returning from apply_suggestions")
return (curr_rule, rules_tried, curr_conf, updated_conf)
# TODO(poojam23): check if this method is required or can we directly set
@ -205,52 +212,53 @@ class ConfigOptimizer:
# RULE from all the triggered rules and apply all its suggestions to
# the appropriate options.
# bootstrapping the optimizer
print('Bootstrapping optimizer:')
print("Bootstrapping optimizer:")
options = copy.deepcopy(self.db_options)
old_data_sources, old_metric = (
self.bench_runner.run_experiment(options, self.base_db_path)
old_data_sources, old_metric = self.bench_runner.run_experiment(
options, self.base_db_path
)
print('Initial metric: ' + str(old_metric))
print("Initial metric: " + str(old_metric))
self.rule_parser.load_rules_from_spec()
self.rule_parser.perform_section_checks()
triggered_rules = self.rule_parser.get_triggered_rules(
old_data_sources, options.get_column_families()
)
print('\nTriggered:')
print("\nTriggered:")
self.rule_parser.print_rules(triggered_rules)
backtrack = False
rules_tried = set()
curr_rule, rules_tried, curr_conf, updated_conf = (
ConfigOptimizer.apply_suggestions(
triggered_rules,
None,
rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict()
)
(
curr_rule,
rules_tried,
curr_conf,
updated_conf,
) = ConfigOptimizer.apply_suggestions(
triggered_rules,
None,
rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict(),
)
# the optimizer loop
while curr_rule:
print('\nRule picked for next iteration:')
print("\nRule picked for next iteration:")
print(curr_rule.name)
print('\ncurrent config:')
print("\ncurrent config:")
print(curr_conf)
print('updated config:')
print("updated config:")
print(updated_conf)
options.update_options(updated_conf)
# run bench_runner with updated config
new_data_sources, new_metric = (
self.bench_runner.run_experiment(options, self.base_db_path)
)
print('\nnew metric: ' + str(new_metric))
backtrack = not self.bench_runner.is_metric_better(
new_metric, old_metric
new_data_sources, new_metric = self.bench_runner.run_experiment(
options, self.base_db_path
)
print("\nnew metric: " + str(new_metric))
backtrack = not self.bench_runner.is_metric_better(new_metric, old_metric)
# update triggered_rules, metric, data_sources, if required
if backtrack:
# revert changes to options config
print('\nBacktracking to previous configuration')
print("\nBacktracking to previous configuration")
backtrack_conf = ConfigOptimizer.get_backtrack_config(
curr_conf, updated_conf
)
@ -262,21 +270,24 @@ class ConfigOptimizer:
triggered_rules = self.rule_parser.get_triggered_rules(
new_data_sources, options.get_column_families()
)
print('\nTriggered:')
print("\nTriggered:")
self.rule_parser.print_rules(triggered_rules)
old_metric = new_metric
old_data_sources = new_data_sources
rules_tried = set()
# pick rule to work on and set curr_rule to that
curr_rule, rules_tried, curr_conf, updated_conf = (
ConfigOptimizer.apply_suggestions(
triggered_rules,
curr_rule.name,
rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict()
)
(
curr_rule,
rules_tried,
curr_conf,
updated_conf,
) = ConfigOptimizer.apply_suggestions(
triggered_rules,
curr_rule.name,
rules_tried,
backtrack,
options,
self.rule_parser.get_suggestions_dict(),
)
# return the final database options configuration
return options

@ -3,15 +3,15 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from abc import ABC, abstractmethod
from calendar import timegm
from enum import Enum
import glob
import re
import time
from abc import ABC, abstractmethod
from calendar import timegm
from enum import Enum
NO_COL_FAMILY = 'DB_WIDE'
NO_COL_FAMILY = "DB_WIDE"
class DataSource(ABC):
@ -33,7 +33,7 @@ class Log:
def is_new_log(log_line):
# The assumption is that a new log will start with a date printed in
# the below regex format.
date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}'
date_regex = "\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa
return re.match(date_regex, log_line)
def __init__(self, log_line, column_families):
@ -46,7 +46,7 @@ class Log:
# "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634]
# [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n"
for col_fam in column_families:
search_for_str = '\[' + col_fam + '\]'
search_for_str = "\[" + col_fam + "\]" # noqa
if re.search(search_for_str, self.message):
self.column_family = col_fam
break
@ -67,21 +67,26 @@ class Log:
return self.message
def append_message(self, remaining_log):
self.message = self.message + '\n' + remaining_log.strip()
self.message = self.message + "\n" + remaining_log.strip()
def get_timestamp(self):
# example: '2018/07/25-11:25:45.782710' will be converted to the GMT
# Unix timestamp 1532517945 (note: this method assumes that self.time
# is in GMT)
hr_time = self.time + 'GMT'
hr_time = self.time + "GMT"
timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z"))
return timestamp
def __repr__(self):
return (
'time: ' + self.time + '; context: ' + self.context +
'; col_fam: ' + self.column_family +
'; message: ' + self.message
"time: "
+ self.time
+ "; context: "
+ self.context
+ "; col_fam: "
+ self.column_family
+ "; message: "
+ self.message
)
@ -106,22 +111,20 @@ class DatabaseLogs(DataSource):
cond.set_trigger(trigger)
def check_and_trigger_conditions(self, conditions):
for file_name in glob.glob(self.logs_path_prefix + '*'):
for file_name in glob.glob(self.logs_path_prefix + "*"):
# TODO(poojam23): find a way to distinguish between log files
# - generated in the current experiment but are labeled 'old'
# because they LOGs exceeded the file size limit AND
# - generated in some previous experiment that are also labeled
# 'old' and were not deleted for some reason
if re.search('old', file_name, re.IGNORECASE):
if re.search("old", file_name, re.IGNORECASE):
continue
with open(file_name, 'r') as db_logs:
with open(file_name, "r") as db_logs:
new_log = None
for line in db_logs:
if Log.is_new_log(line):
if new_log:
self.trigger_conditions_for_log(
conditions, new_log
)
self.trigger_conditions_for_log(conditions, new_log)
new_log = Log(line, self.column_families)
else:
# To account for logs split into multiple lines

@ -4,25 +4,26 @@
# (found in the LICENSE.Apache file in the root directory).
import copy
import os
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.ini_parser import IniParser
import os
class OptionsSpecParser(IniParser):
@staticmethod
def is_new_option(line):
return '=' in line
return "=" in line
@staticmethod
def get_section_type(line):
'''
"""
Example section header: [TableOptions/BlockBasedTable "default"]
Here ConfigurationOptimizer returned would be
'TableOptions.BlockBasedTable'
'''
"""
section_path = line.strip()[1:-1].split()[0]
section_type = '.'.join(section_path.split('/'))
section_type = ".".join(section_path.split("/"))
return section_type
@staticmethod
@ -39,20 +40,20 @@ class OptionsSpecParser(IniParser):
# Example:
# Case 1: get_section_str('DBOptions', NO_COL_FAMILY)
# Case 2: get_section_str('TableOptions.BlockBasedTable', 'default')
section_type = '/'.join(section_type.strip().split('.'))
section_type = "/".join(section_type.strip().split("."))
# Case 1: section_type = 'DBOptions'
# Case 2: section_type = 'TableOptions/BlockBasedTable'
section_str = '[' + section_type
section_str = "[" + section_type
if section_name == NO_COL_FAMILY:
# Case 1: '[DBOptions]'
return (section_str + ']')
return section_str + "]"
else:
# Case 2: '[TableOptions/BlockBasedTable "default"]'
return section_str + ' "' + section_name + '"]'
@staticmethod
def get_option_str(key, values):
option_str = key + '='
option_str = key + "="
# get_option_str('db_log_dir', None), returns 'db_log_dir='
if values:
# example:
@ -61,7 +62,7 @@ class OptionsSpecParser(IniParser):
# 'max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1'
if isinstance(values, list):
for value in values:
option_str += (str(value) + ':')
option_str += str(value) + ":"
option_str = option_str[:-1]
else:
# example: get_option_str('write_buffer_size', 1048576)
@ -71,13 +72,12 @@ class OptionsSpecParser(IniParser):
class DatabaseOptions(DataSource):
@staticmethod
def is_misc_option(option_name):
# these are miscellaneous options that are not yet supported by the
# Rocksdb options file, hence they are not prefixed with any section
# name
return '.' not in option_name
return "." not in option_name
@staticmethod
def get_options_diff(opt_old, opt_new):
@ -102,7 +102,7 @@ class DatabaseOptions(DataSource):
if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
diff[opt][col_fam] = (
opt_old[opt][col_fam],
opt_new[opt][col_fam]
opt_new[opt][col_fam],
)
else:
diff[opt][col_fam] = (opt_old[opt][col_fam], None)
@ -111,7 +111,7 @@ class DatabaseOptions(DataSource):
if opt_old[opt][col_fam] != opt_new[opt][col_fam]:
diff[opt][col_fam] = (
opt_old[opt][col_fam],
opt_new[opt][col_fam]
opt_new[opt][col_fam],
)
else:
diff[opt][col_fam] = (None, opt_new[opt][col_fam])
@ -137,21 +137,19 @@ class DatabaseOptions(DataSource):
self.misc_options = {}
if misc_options:
for option_pair_str in misc_options:
option_name = option_pair_str.split('=')[0].strip()
option_value = option_pair_str.split('=')[1].strip()
option_name = option_pair_str.split("=")[0].strip()
option_value = option_pair_str.split("=")[1].strip()
self.misc_options[option_name] = option_value
def load_from_source(self, options_path):
self.options_dict = {}
with open(options_path, 'r') as db_options:
with open(options_path, "r") as db_options:
for line in db_options:
line = OptionsSpecParser.remove_trailing_comment(line)
if not line:
continue
if OptionsSpecParser.is_section_header(line):
curr_sec_type = (
OptionsSpecParser.get_section_type(line)
)
curr_sec_type = OptionsSpecParser.get_section_type(line)
curr_sec_name = OptionsSpecParser.get_section_name(line)
if curr_sec_type not in self.options_dict:
self.options_dict[curr_sec_type] = {}
@ -163,17 +161,15 @@ class DatabaseOptions(DataSource):
# CFOptions and 'default' is the name of a column family
# that for this database, so it's added to the list of
# column families stored in this object
if curr_sec_type == 'CFOptions':
if curr_sec_type == "CFOptions":
if not self.column_families:
self.column_families = []
self.column_families.append(curr_sec_name)
elif OptionsSpecParser.is_new_option(line):
key, value = OptionsSpecParser.get_key_value_pair(line)
self.options_dict[curr_sec_type][curr_sec_name][key] = (
value
)
self.options_dict[curr_sec_type][curr_sec_name][key] = value
else:
error = 'Not able to parse line in Options file.'
error = "Not able to parse line in Options file."
OptionsSpecParser.exit_with_parse_error(line, error)
def get_misc_options(self):
@ -193,7 +189,7 @@ class DatabaseOptions(DataSource):
for sec_type in self.options_dict:
for col_fam in self.options_dict[sec_type]:
for opt_name in self.options_dict[sec_type][col_fam]:
option = sec_type + '.' + opt_name
option = sec_type + "." + opt_name
all_options.append(option)
all_options.extend(list(self.misc_options.keys()))
return self.get_options(all_options)
@ -211,24 +207,22 @@ class DatabaseOptions(DataSource):
continue
if option not in reqd_options_dict:
reqd_options_dict[option] = {}
reqd_options_dict[option][NO_COL_FAMILY] = (
self.misc_options[option]
)
reqd_options_dict[option][NO_COL_FAMILY] = self.misc_options[option]
else:
# Example: option = 'TableOptions.BlockBasedTable.block_align'
# then, sec_type = 'TableOptions.BlockBasedTable'
sec_type = '.'.join(option.split('.')[:-1])
sec_type = ".".join(option.split(".")[:-1])
# opt_name = 'block_align'
opt_name = option.split('.')[-1]
opt_name = option.split(".")[-1]
if sec_type not in self.options_dict:
continue
for col_fam in self.options_dict[sec_type]:
if opt_name in self.options_dict[sec_type][col_fam]:
if option not in reqd_options_dict:
reqd_options_dict[option] = {}
reqd_options_dict[option][col_fam] = (
self.options_dict[sec_type][col_fam][opt_name]
)
reqd_options_dict[option][col_fam] = self.options_dict[
sec_type
][col_fam][opt_name]
return reqd_options_dict
def update_options(self, options):
@ -244,16 +238,19 @@ class DatabaseOptions(DataSource):
# misc_options dictionary
if NO_COL_FAMILY not in options[option]:
print(
'WARNING(DatabaseOptions.update_options): not ' +
'updating option ' + option + ' because it is in ' +
'misc_option format but its scope is not ' +
NO_COL_FAMILY + '. Check format of option.'
"WARNING(DatabaseOptions.update_options): not "
+ "updating option "
+ option
+ " because it is in "
+ "misc_option format but its scope is not "
+ NO_COL_FAMILY
+ ". Check format of option."
)
continue
self.misc_options[option] = options[option][NO_COL_FAMILY]
else:
sec_name = '.'.join(option.split('.')[:-1])
opt_name = option.split('.')[-1]
sec_name = ".".join(option.split(".")[:-1])
opt_name = option.split(".")[-1]
if sec_name not in self.options_dict:
self.options_dict[sec_name] = {}
for col_fam in options[option]:
@ -262,30 +259,26 @@ class DatabaseOptions(DataSource):
# value
if col_fam not in self.options_dict[sec_name]:
self.options_dict[sec_name][col_fam] = {}
self.options_dict[sec_name][col_fam][opt_name] = (
copy.deepcopy(options[option][col_fam])
self.options_dict[sec_name][col_fam][opt_name] = copy.deepcopy(
options[option][col_fam]
)
def generate_options_config(self, nonce):
# this method generates a Rocksdb OPTIONS file in the INI format from
# the options stored in self.options_dict
this_path = os.path.abspath(os.path.dirname(__file__))
file_name = '../temp/OPTIONS_' + str(nonce) + '.tmp'
file_name = "../temp/OPTIONS_" + str(nonce) + ".tmp"
file_path = os.path.join(this_path, file_name)
with open(file_path, 'w') as fp:
with open(file_path, "w") as fp:
for section in self.options_dict:
for col_fam in self.options_dict[section]:
fp.write(
OptionsSpecParser.get_section_str(section, col_fam) +
'\n'
)
fp.write(OptionsSpecParser.get_section_str(section, col_fam) + "\n")
for option in self.options_dict[section][col_fam]:
values = self.options_dict[section][col_fam][option]
fp.write(
OptionsSpecParser.get_option_str(option, values) +
'\n'
OptionsSpecParser.get_option_str(option, values) + "\n"
)
fp.write('\n')
fp.write("\n")
return file_path
def check_and_trigger_conditions(self, conditions):
@ -299,10 +292,14 @@ class DatabaseOptions(DataSource):
for ix, option in enumerate(cond.options):
if option not in reqd_options_dict:
print(
'WARNING(DatabaseOptions.check_and_trigger): ' +
'skipping condition ' + cond.name + ' because it '
'requires option ' + option + ' but this option is' +
' not available'
"WARNING(DatabaseOptions.check_and_trigger): "
+ "skipping condition "
+ cond.name
+ " because it "
"requires option "
+ option
+ " but this option is"
+ " not available"
)
missing_reqd_option = True
break # required option is absent
@ -321,9 +318,7 @@ class DatabaseOptions(DataSource):
if eval(cond.eval_expr):
cond.set_trigger({NO_COL_FAMILY: options})
except Exception as e:
print(
'WARNING(DatabaseOptions) check_and_trigger:' + str(e)
)
print("WARNING(DatabaseOptions) check_and_trigger:" + str(e))
continue
# for all the options that are not database-wide, we look for their
@ -340,14 +335,9 @@ class DatabaseOptions(DataSource):
if present:
try:
if eval(cond.eval_expr):
col_fam_options_dict[col_fam] = (
copy.deepcopy(options)
)
col_fam_options_dict[col_fam] = copy.deepcopy(options)
except Exception as e:
print(
'WARNING(DatabaseOptions) check_and_trigger: ' +
str(e)
)
print("WARNING(DatabaseOptions) check_and_trigger: " + str(e))
# Trigger for an OptionCondition object is of the form:
# Dict[col_fam_name: List[option_value]]
# where col_fam_name is the name of a column family for which

@ -3,17 +3,19 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.db_log_parser import Log
from advisor.db_timeseries_parser import TimeSeriesData, NO_ENTITY
import copy
import glob
import re
import subprocess
import time
from typing import List
from advisor.db_log_parser import Log
from advisor.db_timeseries_parser import NO_ENTITY, TimeSeriesData
class LogStatsParser(TimeSeriesData):
STATS = 'STATISTICS:'
STATS = "STATISTICS:"
@staticmethod
def parse_log_line_for_stats(log_line):
@ -22,12 +24,8 @@ class LogStatsParser(TimeSeriesData):
token_list = log_line.strip().split()
# token_list = ['rocksdb.db.get.micros', 'P50', ':', '8.4', 'P95', ':',
# '21.8', 'P99', ':', '33.9', 'P100', ':', '92.0']
stat_prefix = token_list[0] + '.' # 'rocksdb.db.get.micros.'
stat_values = [
token
for token in token_list[1:]
if token != ':'
]
stat_prefix = token_list[0] + "." # 'rocksdb.db.get.micros.'
stat_values = [token for token in token_list[1:] if token != ":"]
# stat_values = ['P50', '8.4', 'P95', '21.8', 'P99', '33.9', 'P100',
# '92.0']
stat_dict = {}
@ -58,7 +56,7 @@ class LogStatsParser(TimeSeriesData):
# replace this with the appropriate key_prefix, remove these
# characters here since the LogStatsParser does not need
# a prefix
if key.startswith('[]'):
if key.startswith("[]"):
reqd_stats.append(key[2:])
else:
reqd_stats.append(key)
@ -77,7 +75,7 @@ class LogStatsParser(TimeSeriesData):
# ...
# rocksdb.db.get.micros P50 : 15.6 P95 : 39.7 P99 : 62.6 P100 : 148.0\n
# ..."
new_lines = log.get_message().split('\n')
new_lines = log.get_message().split("\n")
# let log_ts = 1532518219
log_ts = log.get_timestamp()
# example updates to keys_ts:
@ -95,20 +93,17 @@ class LogStatsParser(TimeSeriesData):
# this method parses the Rocksdb LOG file and generates timeseries for
# each of the statistic in the list reqd_stats
self.keys_ts = {NO_ENTITY: {}}
for file_name in glob.glob(self.logs_file_prefix + '*'):
for file_name in glob.glob(self.logs_file_prefix + "*"):
# TODO(poojam23): find a way to distinguish between 'old' log files
# from current and previous experiments, present in the same
# directory
if re.search('old', file_name, re.IGNORECASE):
if re.search("old", file_name, re.IGNORECASE):
continue
with open(file_name, 'r') as db_logs:
with open(file_name, "r") as db_logs:
new_log = None
for line in db_logs:
if Log.is_new_log(line):
if (
new_log and
re.search(self.STATS, new_log.get_message())
):
if new_log and re.search(self.STATS, new_log.get_message()):
self.add_to_timeseries(new_log, reqd_stats)
new_log = Log(line, column_families=[])
else:
@ -123,13 +118,13 @@ class DatabasePerfContext(TimeSeriesData):
# TODO(poojam23): check if any benchrunner provides PerfContext sampled at
# regular intervals
def __init__(self, perf_context_ts, stats_freq_sec, cumulative):
'''
"""
perf_context_ts is expected to be in the following format:
Dict[metric, Dict[timestamp, value]], where for
each (metric, timestamp) pair, the value is database-wide (i.e.
summed over all the threads involved)
if stats_freq_sec == 0, per-metric only one value is reported
'''
"""
super().__init__()
self.stats_freq_sec = stats_freq_sec
self.keys_ts = {NO_ENTITY: perf_context_ts}
@ -148,11 +143,11 @@ class DatabasePerfContext(TimeSeriesData):
continue
for ix, ts in enumerate(timeseries[:-1]):
epoch_ts[NO_ENTITY][stat][ts] = (
epoch_ts[NO_ENTITY][stat][ts] -
epoch_ts[NO_ENTITY][stat][timeseries[ix+1]]
epoch_ts[NO_ENTITY][stat][ts]
- epoch_ts[NO_ENTITY][stat][timeseries[ix + 1]]
)
if epoch_ts[NO_ENTITY][stat][ts] < 0:
raise ValueError('DBPerfContext: really cumulative?')
raise ValueError("DBPerfContext: really cumulative?")
# drop the smallest timestamp in the timeseries for this metric
epoch_ts[NO_ENTITY][stat].pop(timeseries[-1])
self.keys_ts = epoch_ts
@ -171,8 +166,8 @@ class DatabasePerfContext(TimeSeriesData):
class OdsStatsFetcher(TimeSeriesData):
# class constants
OUTPUT_FILE = 'temp/stats_out.tmp'
ERROR_FILE = 'temp/stats_err.tmp'
OUTPUT_FILE = "temp/stats_out.tmp"
ERROR_FILE = "temp/stats_err.tmp"
RAPIDO_COMMAND = "%s --entity=%s --key=%s --tstart=%s --tend=%s --showtime"
# static methods
@ -183,9 +178,9 @@ class OdsStatsFetcher(TimeSeriesData):
@staticmethod
def _get_time_value_pair(pair_string):
# example pair_string: '[1532544591, 97.3653601828]'
pair_string = pair_string.replace('[', '')
pair_string = pair_string.replace(']', '')
pair = pair_string.split(',')
pair_string = pair_string.replace("[", "")
pair_string = pair_string.replace("]", "")
pair = pair_string.split(",")
first = int(pair[0].strip())
second = float(pair[1].strip())
return [first, second]
@ -193,12 +188,10 @@ class OdsStatsFetcher(TimeSeriesData):
@staticmethod
def _get_ods_cli_stime(start_time):
diff = int(time.time() - int(start_time))
stime = str(diff) + '_s'
stime = str(diff) + "_s"
return stime
def __init__(
self, client, entities, start_time, end_time, key_prefix=None
):
def __init__(self, client, entities, start_time, end_time, key_prefix=None):
super().__init__()
self.client = client
self.entities = entities
@ -209,7 +202,7 @@ class OdsStatsFetcher(TimeSeriesData):
self.duration_sec = 60
def execute_script(self, command):
print('executing...')
print("executing...")
print(command)
out_file = open(self.OUTPUT_FILE, "w+")
err_file = open(self.ERROR_FILE, "w+")
@ -222,9 +215,9 @@ class OdsStatsFetcher(TimeSeriesData):
# <entity_name>\t<key_name>\t[[ts, value], [ts, value], ...]
# ts = timestamp; value = value of key_name in entity_name at time ts
self.keys_ts = {}
with open(self.OUTPUT_FILE, 'r') as fp:
with open(self.OUTPUT_FILE, "r") as fp:
for line in fp:
token_list = line.strip().split('\t')
token_list = line.strip().split("\t")
entity = token_list[0]
key = token_list[1]
if entity not in self.keys_ts:
@ -233,7 +226,7 @@ class OdsStatsFetcher(TimeSeriesData):
self.keys_ts[entity][key] = {}
list_of_lists = [
self._get_time_value_pair(pair_string)
for pair_string in token_list[2].split('],')
for pair_string in token_list[2].split("],")
]
value = {pair[0]: pair[1] for pair in list_of_lists}
self.keys_ts[entity][key] = value
@ -243,7 +236,7 @@ class OdsStatsFetcher(TimeSeriesData):
# <entity_name>\t<key_name>\t<timestamp>\t<value>
# there is one line per (entity_name, key_name, timestamp)
self.keys_ts = {}
with open(self.OUTPUT_FILE, 'r') as fp:
with open(self.OUTPUT_FILE, "r") as fp:
for line in fp:
token_list = line.split()
entity = token_list[0]
@ -257,25 +250,29 @@ class OdsStatsFetcher(TimeSeriesData):
def fetch_timeseries(self, statistics):
# this method fetches the timeseries of required stats from the ODS
# service and populates the 'keys_ts' object appropriately
print('OdsStatsFetcher: fetching ' + str(statistics))
if re.search('rapido', self.client, re.IGNORECASE):
print("OdsStatsFetcher: fetching " + str(statistics))
if re.search("rapido", self.client, re.IGNORECASE):
command = self.RAPIDO_COMMAND % (
self.client,
self._get_string_in_quotes(self.entities),
self._get_string_in_quotes(','.join(statistics)),
self._get_string_in_quotes(",".join(statistics)),
self._get_string_in_quotes(self.start_time),
self._get_string_in_quotes(self.end_time)
self._get_string_in_quotes(self.end_time),
)
# Run the tool and fetch the time-series data
self.execute_script(command)
# Parse output and populate the 'keys_ts' map
self.parse_rapido_output()
elif re.search('ods', self.client, re.IGNORECASE):
elif re.search("ods", self.client, re.IGNORECASE):
command = (
self.client + ' ' +
'--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' +
self._get_string_in_quotes(self.entities) + ' ' +
self._get_string_in_quotes(','.join(statistics))
self.client
+ " "
+ "--stime="
+ self._get_ods_cli_stime(self.start_time)
+ " "
+ self._get_string_in_quotes(self.entities)
+ " "
+ self._get_string_in_quotes(",".join(statistics))
)
# Run the tool and fetch the time-series data
self.execute_script(command)
@ -287,7 +284,7 @@ class OdsStatsFetcher(TimeSeriesData):
for cond in conditions:
for key in cond.keys:
use_prefix = False
if key.startswith('[]'):
if key.startswith("[]"):
use_prefix = True
key = key[2:]
# TODO(poojam23): this is very hacky and needs to be improved
@ -295,15 +292,15 @@ class OdsStatsFetcher(TimeSeriesData):
key += ".60"
if use_prefix:
if not self.key_prefix:
print('Warning: OdsStatsFetcher might need key prefix')
print('for the key: ' + key)
print("Warning: OdsStatsFetcher might need key prefix")
print("for the key: " + key)
else:
key = self.key_prefix + "." + key
reqd_stats.append(key)
return reqd_stats
def fetch_rate_url(self, entities, keys, window_len, percent, display):
# type: (List[str], List[str], str, str, bool) -> str
def fetch_rate_url(self, entities: List[str], keys: List[str],
window_len: str, percent: str, display: bool) -> str:
transform_desc = (
"rate(" + str(window_len) + ",duration=" + str(self.duration_sec)
)
@ -311,28 +308,33 @@ class OdsStatsFetcher(TimeSeriesData):
transform_desc = transform_desc + ",%)"
else:
transform_desc = transform_desc + ")"
if re.search('rapido', self.client, re.IGNORECASE):
if re.search("rapido", self.client, re.IGNORECASE):
command = self.RAPIDO_COMMAND + " --transform=%s --url=%s"
command = command % (
self.client,
self._get_string_in_quotes(','.join(entities)),
self._get_string_in_quotes(','.join(keys)),
self._get_string_in_quotes(",".join(entities)),
self._get_string_in_quotes(",".join(keys)),
self._get_string_in_quotes(self.start_time),
self._get_string_in_quotes(self.end_time),
self._get_string_in_quotes(transform_desc),
self._get_string_in_quotes(display)
self._get_string_in_quotes(display),
)
elif re.search('ods', self.client, re.IGNORECASE):
elif re.search("ods", self.client, re.IGNORECASE):
command = (
self.client + ' ' +
'--stime=' + self._get_ods_cli_stime(self.start_time) + ' ' +
'--fburlonly ' +
self._get_string_in_quotes(entities) + ' ' +
self._get_string_in_quotes(','.join(keys)) + ' ' +
self._get_string_in_quotes(transform_desc)
self.client
+ " "
+ "--stime="
+ self._get_ods_cli_stime(self.start_time)
+ " "
+ "--fburlonly "
+ self._get_string_in_quotes(entities)
+ " "
+ self._get_string_in_quotes(",".join(keys))
+ " "
+ self._get_string_in_quotes(transform_desc)
)
self.execute_script(command)
url = ""
with open(self.OUTPUT_FILE, 'r') as fp:
with open(self.OUTPUT_FILE, "r") as fp:
url = fp.readline()
return url

@ -3,13 +3,15 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
import math
from abc import abstractmethod
from advisor.db_log_parser import DataSource
from enum import Enum
import math
from typing import Dict
from advisor.db_log_parser import DataSource
NO_ENTITY = 'ENTITY_PLACEHOLDER'
NO_ENTITY = "ENTITY_PLACEHOLDER"
class TimeSeriesData(DataSource):
@ -42,10 +44,8 @@ class TimeSeriesData(DataSource):
# for each of them and populates the 'keys_ts' dictionary
pass
def fetch_burst_epochs(
self, entities, statistic, window_sec, threshold, percent
):
# type: (str, int, float, bool) -> Dict[str, Dict[int, float]]
def fetch_burst_epochs(self, entities: str, statistic: int,
window_sec: float, threshold: bool, percent: bool) -> Dict[str, Dict[int, float]]:
# this method calculates the (percent) rate change in the 'statistic'
# for each entity (over 'window_sec' seconds) and returns the epochs
# where this rate change is greater than or equal to the 'threshold'
@ -90,7 +90,6 @@ class TimeSeriesData(DataSource):
return burst_epochs
def fetch_aggregated_values(self, entity, statistics, aggregation_op):
# type: (str, AggregationOperator) -> Dict[str, float]
# this method performs the aggregation specified by 'aggregation_op'
# on the timeseries of 'statistics' for 'entity' and returns:
# Dict[statistic, aggregated_value]
@ -145,7 +144,7 @@ class TimeSeriesData(DataSource):
complete_keys[0], # there should be only one key
cond.window_sec,
cond.rate_threshold,
True
True,
)
# Trigger in this case is:
# Dict[entity_name, Dict[timestamp, rate_change]]
@ -156,32 +155,28 @@ class TimeSeriesData(DataSource):
cond.set_trigger(result)
elif cond.behavior is self.Behavior.evaluate_expression:
self.handle_evaluate_expression(
cond,
complete_keys,
entities_with_stats
cond, complete_keys, entities_with_stats
)
def handle_evaluate_expression(self, condition, statistics, entities):
trigger = {}
# check 'condition' for each of these entities
for entity in entities:
if hasattr(condition, 'aggregation_op'):
if hasattr(condition, "aggregation_op"):
# in this case, the aggregation operation is performed on each
# of the condition's 'keys' and then with aggregated values
# condition's 'expression' is evaluated; if it evaluates to
# True, then list of the keys values is added to the
# condition's trigger: Dict[entity_name, List[stats]]
result = self.fetch_aggregated_values(
entity, statistics, condition.aggregation_op
entity, statistics, condition.aggregation_op
)
keys = [result[key] for key in statistics]
try:
if eval(condition.expression):
trigger[entity] = keys
except Exception as e:
print(
'WARNING(TimeSeriesData) check_and_trigger: ' + str(e)
)
print("WARNING(TimeSeriesData) check_and_trigger: " + str(e))
else:
# assumption: all stats have same series of timestamps
# this is similar to the above but 'expression' is evaluated at
@ -190,19 +185,13 @@ class TimeSeriesData(DataSource):
# 'expression' evaluated to true; so trigger is:
# Dict[entity, Dict[timestamp, List[stats]]]
for epoch in self.keys_ts[entity][statistics[0]].keys():
keys = [
self.keys_ts[entity][key][epoch]
for key in statistics
]
keys = [self.keys_ts[entity][key][epoch] for key in statistics]
try:
if eval(condition.expression):
if entity not in trigger:
trigger[entity] = {}
trigger[entity][epoch] = keys
except Exception as e:
print(
'WARNING(TimeSeriesData) check_and_trigger: ' +
str(e)
)
print("WARNING(TimeSeriesData) check_and_trigger: " + str(e))
if trigger:
condition.set_trigger(trigger)

@ -17,7 +17,7 @@ class IniParser:
@staticmethod
def remove_trailing_comment(line):
line = line.strip()
comment_start = line.find('#')
comment_start = line.find("#")
if comment_start > -1:
return line[:comment_start]
return line
@ -27,7 +27,7 @@ class IniParser:
# A section header looks like: [Rule "my-new-rule"]. Essentially,
# a line that is in square-brackets.
line = line.strip()
if line.startswith('[') and line.endswith(']'):
if line.startswith("[") and line.endswith("]"):
return True
return False
@ -38,7 +38,7 @@ class IniParser:
token_list = line.strip()[1:-1].split('"')
if len(token_list) < 3:
error = 'needed section header: [<section_type> "<section_name>"]'
raise ValueError('Parsing error: ' + error + '\n' + line)
raise ValueError("Parsing error: " + error + "\n" + line)
return token_list[1]
@staticmethod
@ -47,22 +47,22 @@ class IniParser:
if not line:
return IniParser.Element.comment
if IniParser.is_section_header(line):
if line.strip()[1:-1].startswith('Suggestion'):
if line.strip()[1:-1].startswith("Suggestion"):
return IniParser.Element.sugg
if line.strip()[1:-1].startswith('Rule'):
if line.strip()[1:-1].startswith("Rule"):
return IniParser.Element.rule
if line.strip()[1:-1].startswith('Condition'):
if line.strip()[1:-1].startswith("Condition"):
return IniParser.Element.cond
if '=' in line:
if "=" in line:
return IniParser.Element.key_val
error = 'not a recognizable RulesSpec element'
raise ValueError('Parsing error: ' + error + '\n' + line)
error = "not a recognizable RulesSpec element"
raise ValueError("Parsing error: " + error + "\n" + line)
@staticmethod
def get_key_value_pair(line):
line = line.strip()
key = line.split('=')[0].strip()
value = "=".join(line.split('=')[1:])
key = line.split("=")[0].strip()
value = "=".join(line.split("=")[1:])
if value == "": # if the option has no value
return (key, None)
values = IniParser.get_list_from_value(value)
@ -72,5 +72,5 @@ class IniParser:
@staticmethod
def get_list_from_value(value):
values = value.strip().split(':')
values = value.strip().split(":")
return values

@ -3,12 +3,13 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
import re
from abc import ABC, abstractmethod
from enum import Enum
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.db_timeseries_parser import TimeSeriesData
from enum import Enum
from advisor.ini_parser import IniParser
import re
class Section(ABC):
@ -38,17 +39,17 @@ class Rule(Section):
# value will be a string and not a list. Hence, convert it to a single
# element list before storing it in self.suggestions or
# self.conditions.
if key == 'conditions':
if key == "conditions":
if isinstance(value, str):
self.conditions = [value]
else:
self.conditions = value
elif key == 'suggestions':
elif key == "suggestions":
if isinstance(value, str):
self.suggestions = [value]
else:
self.suggestions = value
elif key == 'overlap_time_period':
elif key == "overlap_time_period":
self.overlap_time_seconds = value
def get_suggestions(self):
@ -56,35 +57,29 @@ class Rule(Section):
def perform_checks(self):
if not self.conditions or len(self.conditions) < 1:
raise ValueError(
self.name + ': rule must have at least one condition'
)
raise ValueError(self.name + ": rule must have at least one condition")
if not self.suggestions or len(self.suggestions) < 1:
raise ValueError(
self.name + ': rule must have at least one suggestion'
)
raise ValueError(self.name + ": rule must have at least one suggestion")
if self.overlap_time_seconds:
if len(self.conditions) != 2:
raise ValueError(
self.name + ": rule must be associated with 2 conditions\
self.name
+ ": rule must be associated with 2 conditions\
in order to check for a time dependency between them"
)
time_format = '^\d+[s|m|h|d]$'
if (
not
re.match(time_format, self.overlap_time_seconds, re.IGNORECASE)
):
time_format = "^\d+[s|m|h|d]$" # noqa
if not re.match(time_format, self.overlap_time_seconds, re.IGNORECASE):
raise ValueError(
self.name + ": overlap_time_seconds format: \d+[s|m|h|d]"
)
else: # convert to seconds
in_seconds = int(self.overlap_time_seconds[:-1])
if self.overlap_time_seconds[-1] == 'm':
if self.overlap_time_seconds[-1] == "m":
in_seconds *= 60
elif self.overlap_time_seconds[-1] == 'h':
in_seconds *= (60 * 60)
elif self.overlap_time_seconds[-1] == 'd':
in_seconds *= (24 * 60 * 60)
elif self.overlap_time_seconds[-1] == "h":
in_seconds *= 60 * 60
elif self.overlap_time_seconds[-1] == "d":
in_seconds *= 24 * 60 * 60
self.overlap_time_seconds = in_seconds
def get_overlap_timestamps(self, key1_trigger_epochs, key2_trigger_epochs):
@ -93,28 +88,25 @@ class Rule(Section):
# (if present) the first pair of timestamps at which the 2 conditions
# were triggered within 'overlap_time_seconds' of each other
key1_lower_bounds = [
epoch - self.overlap_time_seconds
for epoch in key1_trigger_epochs
epoch - self.overlap_time_seconds for epoch in key1_trigger_epochs
]
key1_lower_bounds.sort()
key2_trigger_epochs.sort()
trigger_ix = 0
overlap_pair = None
for key1_lb in key1_lower_bounds:
while (
key2_trigger_epochs[trigger_ix] < key1_lb and
trigger_ix < len(key2_trigger_epochs)
while key2_trigger_epochs[trigger_ix] < key1_lb and trigger_ix < len(
key2_trigger_epochs
):
trigger_ix += 1
if trigger_ix >= len(key2_trigger_epochs):
break
if (
key2_trigger_epochs[trigger_ix] <=
key1_lb + (2 * self.overlap_time_seconds)
if key2_trigger_epochs[trigger_ix] <= key1_lb + (
2 * self.overlap_time_seconds
):
overlap_pair = (
key2_trigger_epochs[trigger_ix],
key1_lb + self.overlap_time_seconds
key1_lb + self.overlap_time_seconds,
)
break
return overlap_pair
@ -130,10 +122,10 @@ class Rule(Section):
condition1 = conditions_dict[self.conditions[0]]
condition2 = conditions_dict[self.conditions[1]]
if not (
condition1.get_data_source() is DataSource.Type.TIME_SERIES and
condition2.get_data_source() is DataSource.Type.TIME_SERIES
condition1.get_data_source() is DataSource.Type.TIME_SERIES
and condition2.get_data_source() is DataSource.Type.TIME_SERIES
):
raise ValueError(self.name + ': need 2 timeseries conditions')
raise ValueError(self.name + ": need 2 timeseries conditions")
map1 = condition1.get_trigger()
map2 = condition2.get_trigger()
@ -142,14 +134,10 @@ class Rule(Section):
self.trigger_entities = {}
is_triggered = False
entity_intersection = (
set(map1.keys()).intersection(set(map2.keys()))
)
entity_intersection = set(map1.keys()).intersection(set(map2.keys()))
for entity in entity_intersection:
overlap_timestamps_pair = (
self.get_overlap_timestamps(
list(map1[entity].keys()), list(map2[entity].keys())
)
overlap_timestamps_pair = self.get_overlap_timestamps(
list(map1[entity].keys()), list(map2[entity].keys())
)
if overlap_timestamps_pair:
self.trigger_entities[entity] = overlap_timestamps_pair
@ -166,8 +154,8 @@ class Rule(Section):
all_conditions_triggered = False
break
if (
cond.get_data_source() is DataSource.Type.LOG or
cond.get_data_source() is DataSource.Type.DB_OPTIONS
cond.get_data_source() is DataSource.Type.LOG
or cond.get_data_source() is DataSource.Type.DB_OPTIONS
):
cond_col_fam = set(cond.get_trigger().keys())
if NO_COL_FAMILY in cond_col_fam:
@ -180,8 +168,8 @@ class Rule(Section):
if self.trigger_entities is None:
self.trigger_entities = cond_entities
else:
self.trigger_entities = (
self.trigger_entities.intersection(cond_entities)
self.trigger_entities = self.trigger_entities.intersection(
cond_entities
)
if not (self.trigger_entities or self.trigger_column_families):
all_conditions_triggered = False
@ -200,7 +188,7 @@ class Rule(Section):
rule_string += cond
is_first = False
else:
rule_string += (" AND " + cond)
rule_string += " AND " + cond
# Append suggestions
rule_string += "\nsuggestions:: "
is_first = True
@ -209,11 +197,11 @@ class Rule(Section):
rule_string += sugg
is_first = False
else:
rule_string += (", " + sugg)
rule_string += ", " + sugg
if self.trigger_entities:
rule_string += (', entities:: ' + str(self.trigger_entities))
rule_string += ", entities:: " + str(self.trigger_entities)
if self.trigger_column_families:
rule_string += (', col_fam:: ' + str(self.trigger_column_families))
rule_string += ", col_fam:: " + str(self.trigger_column_families)
# Return constructed string
return rule_string
@ -232,7 +220,7 @@ class Suggestion(Section):
self.description = None
def set_parameter(self, key, value):
if key == 'option':
if key == "option":
# Note:
# case 1: 'option' is supported by Rocksdb OPTIONS file; in this
# case the option belongs to one of the sections in the config
@ -240,41 +228,35 @@ class Suggestion(Section):
# case 2: 'option' is not supported by Rocksdb OPTIONS file; the
# option is not expected to have the character '.' in its name
self.option = value
elif key == 'action':
elif key == "action":
if self.option and not value:
raise ValueError(self.name + ': provide action for option')
raise ValueError(self.name + ": provide action for option")
self.action = self.Action[value]
elif key == 'suggested_values':
elif key == "suggested_values":
if isinstance(value, str):
self.suggested_values = [value]
else:
self.suggested_values = value
elif key == 'description':
elif key == "description":
self.description = value
def perform_checks(self):
if not self.description:
if not self.option:
raise ValueError(self.name + ': provide option or description')
raise ValueError(self.name + ": provide option or description")
if not self.action:
raise ValueError(self.name + ': provide action for option')
raise ValueError(self.name + ": provide action for option")
if self.action is self.Action.set and not self.suggested_values:
raise ValueError(
self.name + ': provide suggested value for option'
)
raise ValueError(self.name + ": provide suggested value for option")
def __repr__(self):
sugg_string = "Suggestion: " + self.name
if self.description:
sugg_string += (' description : ' + self.description)
sugg_string += " description : " + self.description
else:
sugg_string += (
' option : ' + self.option + ' action : ' + self.action.name
)
sugg_string += " option : " + self.option + " action : " + self.action.name
if self.suggested_values:
sugg_string += (
' suggested_values : ' + str(self.suggested_values)
)
sugg_string += " suggested_values : " + str(self.suggested_values)
return sugg_string
@ -286,7 +268,7 @@ class Condition(Section):
def perform_checks(self):
if not self.data_source:
raise ValueError(self.name + ': condition not tied to data source')
raise ValueError(self.name + ": condition not tied to data source")
def set_data_source(self, data_source):
self.data_source = data_source
@ -310,28 +292,28 @@ class Condition(Section):
def set_parameter(self, key, value):
# must be defined by the subclass
raise NotImplementedError(self.name + ': provide source for condition')
raise NotImplementedError(self.name + ": provide source for condition")
class LogCondition(Condition):
@classmethod
def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['LOG'])
base_condition.set_data_source(DataSource.Type["LOG"])
base_condition.__class__ = cls
return base_condition
def set_parameter(self, key, value):
if key == 'regex':
if key == "regex":
self.regex = value
def perform_checks(self):
super().perform_checks()
if not self.regex:
raise ValueError(self.name + ': provide regex for log condition')
raise ValueError(self.name + ": provide regex for log condition")
def __repr__(self):
log_cond_str = "LogCondition: " + self.name
log_cond_str += (" regex: " + self.regex)
log_cond_str += " regex: " + self.regex
# if self.trigger:
# log_cond_str += (" trigger: " + str(self.trigger))
return log_cond_str
@ -340,90 +322,90 @@ class LogCondition(Condition):
class OptionCondition(Condition):
@classmethod
def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['DB_OPTIONS'])
base_condition.set_data_source(DataSource.Type["DB_OPTIONS"])
base_condition.__class__ = cls
return base_condition
def set_parameter(self, key, value):
if key == 'options':
if key == "options":
if isinstance(value, str):
self.options = [value]
else:
self.options = value
elif key == 'evaluate':
elif key == "evaluate":
self.eval_expr = value
def perform_checks(self):
super().perform_checks()
if not self.options:
raise ValueError(self.name + ': options missing in condition')
raise ValueError(self.name + ": options missing in condition")
if not self.eval_expr:
raise ValueError(self.name + ': expression missing in condition')
raise ValueError(self.name + ": expression missing in condition")
def __repr__(self):
opt_cond_str = "OptionCondition: " + self.name
opt_cond_str += (" options: " + str(self.options))
opt_cond_str += (" expression: " + self.eval_expr)
opt_cond_str += " options: " + str(self.options)
opt_cond_str += " expression: " + self.eval_expr
if self.trigger:
opt_cond_str += (" trigger: " + str(self.trigger))
opt_cond_str += " trigger: " + str(self.trigger)
return opt_cond_str
class TimeSeriesCondition(Condition):
@classmethod
def create(cls, base_condition):
base_condition.set_data_source(DataSource.Type['TIME_SERIES'])
base_condition.set_data_source(DataSource.Type["TIME_SERIES"])
base_condition.__class__ = cls
return base_condition
def set_parameter(self, key, value):
if key == 'keys':
if key == "keys":
if isinstance(value, str):
self.keys = [value]
else:
self.keys = value
elif key == 'behavior':
elif key == "behavior":
self.behavior = TimeSeriesData.Behavior[value]
elif key == 'rate_threshold':
elif key == "rate_threshold":
self.rate_threshold = float(value)
elif key == 'window_sec':
elif key == "window_sec":
self.window_sec = int(value)
elif key == 'evaluate':
elif key == "evaluate":
self.expression = value
elif key == 'aggregation_op':
elif key == "aggregation_op":
self.aggregation_op = TimeSeriesData.AggregationOperator[value]
def perform_checks(self):
if not self.keys:
raise ValueError(self.name + ': specify timeseries key')
raise ValueError(self.name + ": specify timeseries key")
if not self.behavior:
raise ValueError(self.name + ': specify triggering behavior')
raise ValueError(self.name + ": specify triggering behavior")
if self.behavior is TimeSeriesData.Behavior.bursty:
if not self.rate_threshold:
raise ValueError(self.name + ': specify rate burst threshold')
raise ValueError(self.name + ": specify rate burst threshold")
if not self.window_sec:
self.window_sec = 300 # default window length is 5 minutes
if len(self.keys) > 1:
raise ValueError(self.name + ': specify only one key')
raise ValueError(self.name + ": specify only one key")
elif self.behavior is TimeSeriesData.Behavior.evaluate_expression:
if not (self.expression):
raise ValueError(self.name + ': specify evaluation expression')
raise ValueError(self.name + ": specify evaluation expression")
else:
raise ValueError(self.name + ': trigger behavior not supported')
raise ValueError(self.name + ": trigger behavior not supported")
def __repr__(self):
ts_cond_str = "TimeSeriesCondition: " + self.name
ts_cond_str += (" statistics: " + str(self.keys))
ts_cond_str += (" behavior: " + self.behavior.name)
ts_cond_str += " statistics: " + str(self.keys)
ts_cond_str += " behavior: " + self.behavior.name
if self.behavior is TimeSeriesData.Behavior.bursty:
ts_cond_str += (" rate_threshold: " + str(self.rate_threshold))
ts_cond_str += (" window_sec: " + str(self.window_sec))
ts_cond_str += " rate_threshold: " + str(self.rate_threshold)
ts_cond_str += " window_sec: " + str(self.window_sec)
if self.behavior is TimeSeriesData.Behavior.evaluate_expression:
ts_cond_str += (" expression: " + self.expression)
if hasattr(self, 'aggregation_op'):
ts_cond_str += (" aggregation_op: " + self.aggregation_op.name)
ts_cond_str += " expression: " + self.expression
if hasattr(self, "aggregation_op"):
ts_cond_str += " aggregation_op: " + self.aggregation_op.name
if self.trigger:
ts_cond_str += (" trigger: " + str(self.trigger))
ts_cond_str += " trigger: " + str(self.trigger)
return ts_cond_str
@ -446,7 +428,7 @@ class RulesSpec:
def load_rules_from_spec(self):
self.initialise_fields()
with open(self.file_path, 'r') as db_rules:
with open(self.file_path, "r") as db_rules:
curr_section = None
for line in db_rules:
line = IniParser.remove_trailing_comment(line)
@ -472,12 +454,12 @@ class RulesSpec:
if curr_section is IniParser.Element.rule:
new_rule.set_parameter(key, value)
elif curr_section is IniParser.Element.cond:
if key == 'source':
if value == 'LOG':
if key == "source":
if value == "LOG":
new_cond = LogCondition.create(new_cond)
elif value == 'OPTIONS':
elif value == "OPTIONS":
new_cond = OptionCondition.create(new_cond)
elif value == 'TIME_SERIES':
elif value == "TIME_SERIES":
new_cond = TimeSeriesCondition.create(new_cond)
else:
new_cond.set_parameter(key, value)
@ -515,14 +497,14 @@ class RulesSpec:
def print_rules(self, rules):
for rule in rules:
print('\nRule: ' + rule.name)
print("\nRule: " + rule.name)
for cond_name in rule.conditions:
print(repr(self.conditions_dict[cond_name]))
for sugg_name in rule.suggestions:
print(repr(self.suggestions_dict[sugg_name]))
if rule.trigger_entities:
print('scope: entities:')
print("scope: entities:")
print(rule.trigger_entities)
if rule.trigger_column_families:
print('scope: col_fam:')
print("scope: col_fam:")
print(rule.trigger_column_families)

@ -3,11 +3,12 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.rule_parser import RulesSpec
import argparse
from advisor.db_log_parser import DatabaseLogs, DataSource
from advisor.db_options_parser import DatabaseOptions
from advisor.db_stats_fetcher import LogStatsParser, OdsStatsFetcher
import argparse
from advisor.rule_parser import RulesSpec
def main(args):
@ -18,9 +19,7 @@ def main(args):
# initialize the DatabaseOptions object
db_options = DatabaseOptions(args.rocksdb_options)
# Create DatabaseLogs object
db_logs = DatabaseLogs(
args.log_files_path_prefix, db_options.get_column_families()
)
db_logs = DatabaseLogs(args.log_files_path_prefix, db_options.get_column_families())
# Create the Log STATS object
db_log_stats = LogStatsParser(
args.log_files_path_prefix, args.stats_dump_period_sec
@ -28,62 +27,72 @@ def main(args):
data_sources = {
DataSource.Type.DB_OPTIONS: [db_options],
DataSource.Type.LOG: [db_logs],
DataSource.Type.TIME_SERIES: [db_log_stats]
DataSource.Type.TIME_SERIES: [db_log_stats],
}
if args.ods_client:
data_sources[DataSource.Type.TIME_SERIES].append(OdsStatsFetcher(
args.ods_client,
args.ods_entity,
args.ods_tstart,
args.ods_tend,
args.ods_key_prefix
))
data_sources[DataSource.Type.TIME_SERIES].append(
OdsStatsFetcher(
args.ods_client,
args.ods_entity,
args.ods_tstart,
args.ods_tend,
args.ods_key_prefix,
)
)
triggered_rules = rule_spec_parser.get_triggered_rules(
data_sources, db_options.get_column_families()
)
rule_spec_parser.print_rules(triggered_rules)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Use this script to get\
suggestions for improving Rocksdb performance.')
parser.add_argument(
'--rules_spec', required=True, type=str,
help='path of the file containing the expert-specified Rules'
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Use this script to get\
suggestions for improving Rocksdb performance."
)
parser.add_argument(
'--rocksdb_options', required=True, type=str,
help='path of the starting Rocksdb OPTIONS file'
"--rules_spec",
required=True,
type=str,
help="path of the file containing the expert-specified Rules",
)
parser.add_argument(
'--log_files_path_prefix', required=True, type=str,
help='path prefix of the Rocksdb LOG files'
"--rocksdb_options",
required=True,
type=str,
help="path of the starting Rocksdb OPTIONS file",
)
parser.add_argument(
'--stats_dump_period_sec', required=True, type=int,
help='the frequency (in seconds) at which STATISTICS are printed to ' +
'the Rocksdb LOG file'
"--log_files_path_prefix",
required=True,
type=str,
help="path prefix of the Rocksdb LOG files",
)
# ODS arguments
parser.add_argument(
'--ods_client', type=str, help='the ODS client binary'
"--stats_dump_period_sec",
required=True,
type=int,
help="the frequency (in seconds) at which STATISTICS are printed to "
+ "the Rocksdb LOG file",
)
# ODS arguments
parser.add_argument("--ods_client", type=str, help="the ODS client binary")
parser.add_argument(
'--ods_entity', type=str,
help='the servers for which the ODS stats need to be fetched'
"--ods_entity",
type=str,
help="the servers for which the ODS stats need to be fetched",
)
parser.add_argument(
'--ods_key_prefix', type=str,
help='the prefix that needs to be attached to the keys of time ' +
'series to be fetched from ODS'
"--ods_key_prefix",
type=str,
help="the prefix that needs to be attached to the keys of time "
+ "series to be fetched from ODS",
)
parser.add_argument(
'--ods_tstart', type=int,
help='start time of timeseries to be fetched from ODS'
"--ods_tstart", type=int, help="start time of timeseries to be fetched from ODS"
)
parser.add_argument(
'--ods_tend', type=int,
help='end time of timeseries to be fetched from ODS'
"--ods_tend", type=int, help="end time of timeseries to be fetched from ODS"
)
args = parser.parse_args()
main(args)

@ -3,24 +3,25 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.db_bench_runner import DBBenchRunner
from advisor.db_log_parser import NO_COL_FAMILY, DataSource
from advisor.db_options_parser import DatabaseOptions
import os
import unittest
from advisor.db_bench_runner import DBBenchRunner
from advisor.db_log_parser import DataSource, NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions
class TestDBBenchRunnerMethods(unittest.TestCase):
def setUp(self):
self.pos_args = [
'./../../db_bench',
'overwrite',
'use_existing_db=true',
'duration=10'
"./../../db_bench",
"overwrite",
"use_existing_db=true",
"duration=10",
]
self.bench_runner = DBBenchRunner(self.pos_args)
this_path = os.path.abspath(os.path.dirname(__file__))
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
self.db_options = DatabaseOptions(options_path)
def test_setup(self):
@ -31,71 +32,70 @@ class TestDBBenchRunnerMethods(unittest.TestCase):
)
def test_get_info_log_file_name(self):
log_file_name = DBBenchRunner.get_info_log_file_name(
None, 'random_path'
)
self.assertEqual(log_file_name, 'LOG')
log_file_name = DBBenchRunner.get_info_log_file_name(None, "random_path")
self.assertEqual(log_file_name, "LOG")
log_file_name = DBBenchRunner.get_info_log_file_name(
'/dev/shm/', '/tmp/rocksdbtest-155919/dbbench/'
"/dev/shm/", "/tmp/rocksdbtest-155919/dbbench/"
)
self.assertEqual(log_file_name, 'tmp_rocksdbtest-155919_dbbench_LOG')
self.assertEqual(log_file_name, "tmp_rocksdbtest-155919_dbbench_LOG")
def test_get_opt_args_str(self):
misc_opt_dict = {'bloom_bits': 2, 'empty_opt': None, 'rate_limiter': 3}
misc_opt_dict = {"bloom_bits": 2, "empty_opt": None, "rate_limiter": 3}
optional_args_str = DBBenchRunner.get_opt_args_str(misc_opt_dict)
self.assertEqual(optional_args_str, ' --bloom_bits=2 --rate_limiter=3')
self.assertEqual(optional_args_str, " --bloom_bits=2 --rate_limiter=3")
def test_get_log_options(self):
db_path = '/tmp/rocksdb-155919/dbbench'
db_path = "/tmp/rocksdb-155919/dbbench"
# when db_log_dir is present in the db_options
update_dict = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'},
'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '20'}
"DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"},
"DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "20"},
}
self.db_options.update_options(update_dict)
log_file_prefix, stats_freq = self.bench_runner.get_log_options(
self.db_options, db_path
)
self.assertEqual(
log_file_prefix, '/dev/shm/tmp_rocksdb-155919_dbbench_LOG'
)
self.assertEqual(log_file_prefix, "/dev/shm/tmp_rocksdb-155919_dbbench_LOG")
self.assertEqual(stats_freq, 20)
update_dict = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: None},
'DBOptions.stats_dump_period_sec': {NO_COL_FAMILY: '30'}
"DBOptions.db_log_dir": {NO_COL_FAMILY: None},
"DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: "30"},
}
self.db_options.update_options(update_dict)
log_file_prefix, stats_freq = self.bench_runner.get_log_options(
self.db_options, db_path
)
self.assertEqual(log_file_prefix, '/tmp/rocksdb-155919/dbbench/LOG')
self.assertEqual(log_file_prefix, "/tmp/rocksdb-155919/dbbench/LOG")
self.assertEqual(stats_freq, 30)
def test_build_experiment_command(self):
# add some misc_options to db_options
update_dict = {
'bloom_bits': {NO_COL_FAMILY: 2},
'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: 128000000}
"bloom_bits": {NO_COL_FAMILY: 2},
"rate_limiter_bytes_per_sec": {NO_COL_FAMILY: 128000000},
}
self.db_options.update_options(update_dict)
db_path = '/dev/shm'
db_path = "/dev/shm"
experiment_command = self.bench_runner._build_experiment_command(
self.db_options, db_path
)
opt_args_str = DBBenchRunner.get_opt_args_str(
self.db_options.get_misc_options()
)
opt_args_str += (
' --options_file=' +
self.db_options.generate_options_config('12345')
opt_args_str += " --options_file=" + self.db_options.generate_options_config(
"12345"
)
for arg in self.pos_args[2:]:
opt_args_str += (' --' + arg)
opt_args_str += " --" + arg
expected_command = (
self.pos_args[0] + ' --benchmarks=' + self.pos_args[1] +
' --statistics --perf_level=3 --db=' + db_path + opt_args_str
self.pos_args[0]
+ " --benchmarks="
+ self.pos_args[1]
+ " --statistics --perf_level=3 --db="
+ db_path
+ opt_args_str
)
self.assertEqual(experiment_command, expected_command)
@ -104,44 +104,38 @@ class TestDBBenchRunner(unittest.TestCase):
def setUp(self):
# Note: the db_bench binary should be present in the rocksdb/ directory
self.pos_args = [
'./../../db_bench',
'overwrite',
'use_existing_db=true',
'duration=20'
"./../../db_bench",
"overwrite",
"use_existing_db=true",
"duration=20",
]
self.bench_runner = DBBenchRunner(self.pos_args)
this_path = os.path.abspath(os.path.dirname(__file__))
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
self.db_options = DatabaseOptions(options_path)
def test_experiment_output(self):
update_dict = {'bloom_bits': {NO_COL_FAMILY: 2}}
update_dict = {"bloom_bits": {NO_COL_FAMILY: 2}}
self.db_options.update_options(update_dict)
db_path = '/dev/shm'
db_path = "/dev/shm"
data_sources, throughput = self.bench_runner.run_experiment(
self.db_options, db_path
)
self.assertEqual(
data_sources[DataSource.Type.DB_OPTIONS][0].type,
DataSource.Type.DB_OPTIONS
)
self.assertEqual(
data_sources[DataSource.Type.LOG][0].type,
DataSource.Type.LOG
data_sources[DataSource.Type.DB_OPTIONS][0].type, DataSource.Type.DB_OPTIONS
)
self.assertEqual(data_sources[DataSource.Type.LOG][0].type, DataSource.Type.LOG)
self.assertEqual(len(data_sources[DataSource.Type.TIME_SERIES]), 2)
self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][0].type,
DataSource.Type.TIME_SERIES
DataSource.Type.TIME_SERIES,
)
self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][1].type,
DataSource.Type.TIME_SERIES
)
self.assertEqual(
data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0
DataSource.Type.TIME_SERIES,
)
self.assertEqual(data_sources[DataSource.Type.TIME_SERIES][1].stats_freq_sec, 0)
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

@ -3,52 +3,49 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
from advisor.rule_parser import Condition, LogCondition
import os
import unittest
from advisor.db_log_parser import DatabaseLogs, Log, NO_COL_FAMILY
from advisor.rule_parser import Condition, LogCondition
class TestLog(unittest.TestCase):
def setUp(self):
self.column_families = ['default', 'col_fam_A']
self.column_families = ["default", "col_fam_A"]
def test_get_column_family(self):
test_log = (
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
"[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK"
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] "
+ "[col_fam_A] [JOB 44] Level-0 flush table #84: 1890780 bytes OK"
)
db_log = Log(test_log, self.column_families)
self.assertEqual('col_fam_A', db_log.get_column_family())
self.assertEqual("col_fam_A", db_log.get_column_family())
test_log = (
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] " +
"[JOB 44] Level-0 flush table #84: 1890780 bytes OK"
"2018/05/25-14:34:21.047233 7f82ba72e700 [db/flush_job.cc:371] "
+ "[JOB 44] Level-0 flush table #84: 1890780 bytes OK"
)
db_log = Log(test_log, self.column_families)
db_log.append_message('[default] some remaining part of log')
db_log.append_message("[default] some remaining part of log")
self.assertEqual(NO_COL_FAMILY, db_log.get_column_family())
def test_get_methods(self):
hr_time = "2018/05/25-14:30:25.491635"
context = "7f82ba72e700"
message = (
"[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table " +
"#23: started"
"[db/flush_job.cc:331] [default] [JOB 10] Level-0 flush table "
+ "#23: started"
)
test_log = hr_time + " " + context + " " + message
db_log = Log(test_log, self.column_families)
self.assertEqual(db_log.get_message(), message)
remaining_message = "[col_fam_A] some more logs"
db_log.append_message(remaining_message)
self.assertEqual(
db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635"
)
self.assertEqual(db_log.get_human_readable_time(), "2018/05/25-14:30:25.491635")
self.assertEqual(db_log.get_context(), "7f82ba72e700")
self.assertEqual(db_log.get_timestamp(), 1527258625)
self.assertEqual(
db_log.get_message(), str(message + '\n' + remaining_message)
)
self.assertEqual(db_log.get_message(), str(message + "\n" + remaining_message))
def test_is_new_log(self):
new_log = "2018/05/25-14:34:21.047233 context random new log"
@ -60,44 +57,40 @@ class TestLog(unittest.TestCase):
class TestDatabaseLogs(unittest.TestCase):
def test_check_and_trigger_conditions(self):
this_path = os.path.abspath(os.path.dirname(__file__))
logs_path_prefix = os.path.join(this_path, 'input_files/LOG-0')
column_families = ['default', 'col-fam-A', 'col-fam-B']
logs_path_prefix = os.path.join(this_path, "input_files/LOG-0")
column_families = ["default", "col-fam-A", "col-fam-B"]
db_logs = DatabaseLogs(logs_path_prefix, column_families)
# matches, has 2 col_fams
condition1 = LogCondition.create(Condition('cond-A'))
condition1.set_parameter('regex', 'random log message')
condition1 = LogCondition.create(Condition("cond-A"))
condition1.set_parameter("regex", "random log message")
# matches, multiple lines message
condition2 = LogCondition.create(Condition('cond-B'))
condition2.set_parameter('regex', 'continuing on next line')
condition2 = LogCondition.create(Condition("cond-B"))
condition2.set_parameter("regex", "continuing on next line")
# does not match
condition3 = LogCondition.create(Condition('cond-C'))
condition3.set_parameter('regex', 'this should match no log')
db_logs.check_and_trigger_conditions(
[condition1, condition2, condition3]
)
condition3 = LogCondition.create(Condition("cond-C"))
condition3.set_parameter("regex", "this should match no log")
db_logs.check_and_trigger_conditions([condition1, condition2, condition3])
cond1_trigger = condition1.get_trigger()
self.assertEqual(2, len(cond1_trigger.keys()))
self.assertSetEqual(
{'col-fam-A', NO_COL_FAMILY}, set(cond1_trigger.keys())
)
self.assertEqual(2, len(cond1_trigger['col-fam-A']))
self.assertSetEqual({"col-fam-A", NO_COL_FAMILY}, set(cond1_trigger.keys()))
self.assertEqual(2, len(cond1_trigger["col-fam-A"]))
messages = [
"[db/db_impl.cc:563] [col-fam-A] random log message for testing",
"[db/db_impl.cc:653] [col-fam-A] another random log message"
"[db/db_impl.cc:653] [col-fam-A] another random log message",
]
self.assertIn(cond1_trigger['col-fam-A'][0].get_message(), messages)
self.assertIn(cond1_trigger['col-fam-A'][1].get_message(), messages)
self.assertIn(cond1_trigger["col-fam-A"][0].get_message(), messages)
self.assertIn(cond1_trigger["col-fam-A"][1].get_message(), messages)
self.assertEqual(1, len(cond1_trigger[NO_COL_FAMILY]))
self.assertEqual(
cond1_trigger[NO_COL_FAMILY][0].get_message(),
"[db/db_impl.cc:331] [unknown] random log message no column family"
"[db/db_impl.cc:331] [unknown] random log message no column family",
)
cond2_trigger = condition2.get_trigger()
self.assertEqual(['col-fam-B'], list(cond2_trigger.keys()))
self.assertEqual(1, len(cond2_trigger['col-fam-B']))
self.assertEqual(["col-fam-B"], list(cond2_trigger.keys()))
self.assertEqual(1, len(cond2_trigger["col-fam-B"]))
self.assertEqual(
cond2_trigger['col-fam-B'][0].get_message(),
"[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n" +
"remaining part of the log"
cond2_trigger["col-fam-B"][0].get_message(),
"[db/db_impl.cc:234] [col-fam-B] log continuing on next line\n"
+ "remaining part of the log",
)
self.assertIsNone(condition3.get_trigger())

@ -3,105 +3,107 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
import os
import unittest
from advisor.db_log_parser import NO_COL_FAMILY
from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import Condition, OptionCondition
import os
import unittest
class TestDatabaseOptions(unittest.TestCase):
def setUp(self):
self.this_path = os.path.abspath(os.path.dirname(__file__))
self.og_options = os.path.join(
self.this_path, 'input_files/OPTIONS-000005'
)
misc_options = [
'bloom_bits = 4', 'rate_limiter_bytes_per_sec = 1024000'
]
self.og_options = os.path.join(self.this_path, "input_files/OPTIONS-000005")
misc_options = ["bloom_bits = 4", "rate_limiter_bytes_per_sec = 1024000"]
# create the options object
self.db_options = DatabaseOptions(self.og_options, misc_options)
# perform clean-up before running tests
self.generated_options = os.path.join(
self.this_path, '../temp/OPTIONS_testing.tmp'
self.this_path, "../temp/OPTIONS_testing.tmp"
)
if os.path.isfile(self.generated_options):
os.remove(self.generated_options)
def test_get_options_diff(self):
old_opt = {
'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: '20'},
'CFOptions.write_buffer_size': {
'default': '1024000',
'col_fam_A': '128000',
'col_fam_B': '128000000'
"DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: "20"},
"CFOptions.write_buffer_size": {
"default": "1024000",
"col_fam_A": "128000",
"col_fam_B": "128000000",
},
'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'},
'DBOptions.max_log_file_size': {NO_COL_FAMILY: '128000000'}
"DBOptions.use_fsync": {NO_COL_FAMILY: "true"},
"DBOptions.max_log_file_size": {NO_COL_FAMILY: "128000000"},
}
new_opt = {
'bloom_bits': {NO_COL_FAMILY: '4'},
'CFOptions.write_buffer_size': {
'default': '128000000',
'col_fam_A': '128000',
'col_fam_C': '128000000'
"bloom_bits": {NO_COL_FAMILY: "4"},
"CFOptions.write_buffer_size": {
"default": "128000000",
"col_fam_A": "128000",
"col_fam_C": "128000000",
},
'DBOptions.use_fsync': {NO_COL_FAMILY: 'true'},
'DBOptions.max_log_file_size': {NO_COL_FAMILY: '0'}
"DBOptions.use_fsync": {NO_COL_FAMILY: "true"},
"DBOptions.max_log_file_size": {NO_COL_FAMILY: "0"},
}
diff = DatabaseOptions.get_options_diff(old_opt, new_opt)
expected_diff = {
'DBOptions.stats_dump_freq_sec': {NO_COL_FAMILY: ('20', None)},
'bloom_bits': {NO_COL_FAMILY: (None, '4')},
'CFOptions.write_buffer_size': {
'default': ('1024000', '128000000'),
'col_fam_B': ('128000000', None),
'col_fam_C': (None, '128000000')
"DBOptions.stats_dump_freq_sec": {NO_COL_FAMILY: ("20", None)},
"bloom_bits": {NO_COL_FAMILY: (None, "4")},
"CFOptions.write_buffer_size": {
"default": ("1024000", "128000000"),
"col_fam_B": ("128000000", None),
"col_fam_C": (None, "128000000"),
},
'DBOptions.max_log_file_size': {NO_COL_FAMILY: ('128000000', '0')}
"DBOptions.max_log_file_size": {NO_COL_FAMILY: ("128000000", "0")},
}
self.assertDictEqual(diff, expected_diff)
def test_is_misc_option(self):
self.assertTrue(DatabaseOptions.is_misc_option('bloom_bits'))
self.assertTrue(DatabaseOptions.is_misc_option("bloom_bits"))
self.assertFalse(
DatabaseOptions.is_misc_option('DBOptions.stats_dump_freq_sec')
DatabaseOptions.is_misc_option("DBOptions.stats_dump_freq_sec")
)
def test_set_up(self):
options = self.db_options.get_all_options()
self.assertEqual(22, len(options.keys()))
expected_misc_options = {
'bloom_bits': '4', 'rate_limiter_bytes_per_sec': '1024000'
"bloom_bits": "4",
"rate_limiter_bytes_per_sec": "1024000",
}
self.assertDictEqual(
expected_misc_options, self.db_options.get_misc_options()
)
self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options())
self.assertListEqual(
['default', 'col_fam_A'], self.db_options.get_column_families()
["default", "col_fam_A"], self.db_options.get_column_families()
)
def test_get_options(self):
opt_to_get = [
'DBOptions.manual_wal_flush', 'DBOptions.db_write_buffer_size',
'bloom_bits', 'CFOptions.compaction_filter_factory',
'CFOptions.num_levels', 'rate_limiter_bytes_per_sec',
'TableOptions.BlockBasedTable.block_align', 'random_option'
"DBOptions.manual_wal_flush",
"DBOptions.db_write_buffer_size",
"bloom_bits",
"CFOptions.compaction_filter_factory",
"CFOptions.num_levels",
"rate_limiter_bytes_per_sec",
"TableOptions.BlockBasedTable.block_align",
"random_option",
]
options = self.db_options.get_options(opt_to_get)
expected_options = {
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'},
'DBOptions.db_write_buffer_size': {NO_COL_FAMILY: '0'},
'bloom_bits': {NO_COL_FAMILY: '4'},
'CFOptions.compaction_filter_factory': {
'default': 'nullptr', 'col_fam_A': 'nullptr'
"DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"},
"DBOptions.db_write_buffer_size": {NO_COL_FAMILY: "0"},
"bloom_bits": {NO_COL_FAMILY: "4"},
"CFOptions.compaction_filter_factory": {
"default": "nullptr",
"col_fam_A": "nullptr",
},
"CFOptions.num_levels": {"default": "7", "col_fam_A": "5"},
"rate_limiter_bytes_per_sec": {NO_COL_FAMILY: "1024000"},
"TableOptions.BlockBasedTable.block_align": {
"default": "false",
"col_fam_A": "true",
},
'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'},
'rate_limiter_bytes_per_sec': {NO_COL_FAMILY: '1024000'},
'TableOptions.BlockBasedTable.block_align': {
'default': 'false', 'col_fam_A': 'true'
}
}
self.assertDictEqual(expected_options, options)
@ -109,108 +111,104 @@ class TestDatabaseOptions(unittest.TestCase):
# add new, update old, set old
# before updating
expected_old_opts = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: None},
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'false'},
'bloom_bits': {NO_COL_FAMILY: '4'},
'CFOptions.num_levels': {'default': '7', 'col_fam_A': '5'},
'TableOptions.BlockBasedTable.block_restart_interval': {
'col_fam_A': '16'
}
"DBOptions.db_log_dir": {NO_COL_FAMILY: None},
"DBOptions.manual_wal_flush": {NO_COL_FAMILY: "false"},
"bloom_bits": {NO_COL_FAMILY: "4"},
"CFOptions.num_levels": {"default": "7", "col_fam_A": "5"},
"TableOptions.BlockBasedTable.block_restart_interval": {"col_fam_A": "16"},
}
get_opts = list(expected_old_opts.keys())
options = self.db_options.get_options(get_opts)
self.assertEqual(expected_old_opts, options)
# after updating options
update_opts = {
'DBOptions.db_log_dir': {NO_COL_FAMILY: '/dev/shm'},
'DBOptions.manual_wal_flush': {NO_COL_FAMILY: 'true'},
'bloom_bits': {NO_COL_FAMILY: '2'},
'CFOptions.num_levels': {'col_fam_A': '7'},
'TableOptions.BlockBasedTable.block_restart_interval': {
'default': '32'
},
'random_misc_option': {NO_COL_FAMILY: 'something'}
"DBOptions.db_log_dir": {NO_COL_FAMILY: "/dev/shm"},
"DBOptions.manual_wal_flush": {NO_COL_FAMILY: "true"},
"bloom_bits": {NO_COL_FAMILY: "2"},
"CFOptions.num_levels": {"col_fam_A": "7"},
"TableOptions.BlockBasedTable.block_restart_interval": {"default": "32"},
"random_misc_option": {NO_COL_FAMILY: "something"},
}
self.db_options.update_options(update_opts)
update_opts['CFOptions.num_levels']['default'] = '7'
update_opts['TableOptions.BlockBasedTable.block_restart_interval'] = {
'default': '32', 'col_fam_A': '16'
update_opts["CFOptions.num_levels"]["default"] = "7"
update_opts["TableOptions.BlockBasedTable.block_restart_interval"] = {
"default": "32",
"col_fam_A": "16",
}
get_opts.append('random_misc_option')
get_opts.append("random_misc_option")
options = self.db_options.get_options(get_opts)
self.assertDictEqual(update_opts, options)
expected_misc_options = {
'bloom_bits': '2',
'rate_limiter_bytes_per_sec': '1024000',
'random_misc_option': 'something'
"bloom_bits": "2",
"rate_limiter_bytes_per_sec": "1024000",
"random_misc_option": "something",
}
self.assertDictEqual(
expected_misc_options, self.db_options.get_misc_options()
)
self.assertDictEqual(expected_misc_options, self.db_options.get_misc_options())
def test_generate_options_config(self):
# make sure file does not exist from before
self.assertFalse(os.path.isfile(self.generated_options))
self.db_options.generate_options_config('testing')
self.db_options.generate_options_config("testing")
self.assertTrue(os.path.isfile(self.generated_options))
def test_check_and_trigger_conditions(self):
# options only from CFOptions
# setup the OptionCondition objects to check and trigger
update_dict = {
'CFOptions.level0_file_num_compaction_trigger': {'col_fam_A': '4'},
'CFOptions.max_bytes_for_level_base': {'col_fam_A': '10'}
"CFOptions.level0_file_num_compaction_trigger": {"col_fam_A": "4"},
"CFOptions.max_bytes_for_level_base": {"col_fam_A": "10"},
}
self.db_options.update_options(update_dict)
cond1 = Condition('opt-cond-1')
cond1 = Condition("opt-cond-1")
cond1 = OptionCondition.create(cond1)
cond1.set_parameter(
'options', [
'CFOptions.level0_file_num_compaction_trigger',
'TableOptions.BlockBasedTable.block_restart_interval',
'CFOptions.max_bytes_for_level_base'
]
"options",
[
"CFOptions.level0_file_num_compaction_trigger",
"TableOptions.BlockBasedTable.block_restart_interval",
"CFOptions.max_bytes_for_level_base",
],
)
cond1.set_parameter(
'evaluate',
'int(options[0])*int(options[1])-int(options[2])>=0'
"evaluate", "int(options[0])*int(options[1])-int(options[2])>=0"
)
# only DBOptions
cond2 = Condition('opt-cond-2')
cond2 = Condition("opt-cond-2")
cond2 = OptionCondition.create(cond2)
cond2.set_parameter(
'options', [
'DBOptions.db_write_buffer_size',
'bloom_bits',
'rate_limiter_bytes_per_sec'
]
"options",
[
"DBOptions.db_write_buffer_size",
"bloom_bits",
"rate_limiter_bytes_per_sec",
],
)
cond2.set_parameter(
'evaluate',
'(int(options[2]) * int(options[1]) * int(options[0]))==0'
"evaluate", "(int(options[2]) * int(options[1]) * int(options[0]))==0"
)
# mix of CFOptions and DBOptions
cond3 = Condition('opt-cond-3')
cond3 = Condition("opt-cond-3")
cond3 = OptionCondition.create(cond3)
cond3.set_parameter(
'options', [
'DBOptions.db_write_buffer_size', # 0
'CFOptions.num_levels', # 5, 7
'bloom_bits' # 4
]
"options",
[
"DBOptions.db_write_buffer_size", # 0
"CFOptions.num_levels", # 5, 7
"bloom_bits", # 4
],
)
cond3.set_parameter(
'evaluate', 'int(options[2])*int(options[0])+int(options[1])>6'
"evaluate", "int(options[2])*int(options[0])+int(options[1])>6"
)
self.db_options.check_and_trigger_conditions([cond1, cond2, cond3])
cond1_trigger = {'col_fam_A': ['4', '16', '10']}
cond1_trigger = {"col_fam_A": ["4", "16", "10"]}
self.assertDictEqual(cond1_trigger, cond1.get_trigger())
cond2_trigger = {NO_COL_FAMILY: ['0', '4', '1024000']}
cond2_trigger = {NO_COL_FAMILY: ["0", "4", "1024000"]}
self.assertDictEqual(cond2_trigger, cond2.get_trigger())
cond3_trigger = {'default': ['0', '7', '4']}
cond3_trigger = {"default": ["0", "7", "4"]}
self.assertDictEqual(cond3_trigger, cond3.get_trigger())
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

@ -3,49 +3,46 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
from advisor.db_stats_fetcher import LogStatsParser, DatabasePerfContext
from advisor.db_timeseries_parser import NO_ENTITY
from advisor.rule_parser import Condition, TimeSeriesCondition
import os
import time
import unittest
from unittest.mock import MagicMock
from advisor.db_stats_fetcher import DatabasePerfContext, LogStatsParser
from advisor.db_timeseries_parser import NO_ENTITY
from advisor.rule_parser import Condition, TimeSeriesCondition
class TestLogStatsParser(unittest.TestCase):
def setUp(self):
this_path = os.path.abspath(os.path.dirname(__file__))
stats_file = os.path.join(
this_path, 'input_files/log_stats_parser_keys_ts'
)
stats_file = os.path.join(this_path, "input_files/log_stats_parser_keys_ts")
# populate the keys_ts dictionary of LogStatsParser
self.stats_dict = {NO_ENTITY: {}}
with open(stats_file, 'r') as fp:
with open(stats_file, "r") as fp:
for line in fp:
stat_name = line.split(':')[0].strip()
stat_name = line.split(":")[0].strip()
self.stats_dict[NO_ENTITY][stat_name] = {}
token_list = line.split(':')[1].strip().split(',')
token_list = line.split(":")[1].strip().split(",")
for token in token_list:
timestamp = int(token.split()[0])
value = float(token.split()[1])
self.stats_dict[NO_ENTITY][stat_name][timestamp] = value
self.log_stats_parser = LogStatsParser('dummy_log_file', 20)
self.log_stats_parser = LogStatsParser("dummy_log_file", 20)
self.log_stats_parser.keys_ts = self.stats_dict
def test_check_and_trigger_conditions_bursty(self):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: bursty
cond1 = Condition('cond-1')
cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50')
cond1.set_parameter('behavior', 'bursty')
cond1.set_parameter('window_sec', 40)
cond1.set_parameter('rate_threshold', 0)
cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter("behavior", "bursty")
cond1.set_parameter("window_sec", 40)
cond1.set_parameter("rate_threshold", 0)
self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = {
NO_ENTITY: {1530896440: 0.9767546362322214}
}
expected_cond_trigger = {NO_ENTITY: {1530896440: 0.9767546362322214}}
self.assertDictEqual(expected_cond_trigger, cond1.get_trigger())
# ensure that fetch_timeseries() was called once
self.log_stats_parser.fetch_timeseries.assert_called_once()
@ -54,23 +51,20 @@ class TestLogStatsParser(unittest.TestCase):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: evaluate_expression
cond1 = Condition('cond-1')
cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50')
cond1.set_parameter('behavior', 'evaluate_expression')
keys = [
'rocksdb.manifest.file.sync.micros.p99',
'rocksdb.db.get.micros.p50'
]
cond1.set_parameter('keys', keys)
cond1.set_parameter('aggregation_op', 'latest')
cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter("behavior", "evaluate_expression")
keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"]
cond1.set_parameter("keys", keys)
cond1.set_parameter("aggregation_op", "latest")
# condition evaluates to FALSE
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>200')
cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>200")
self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]}
self.assertIsNone(cond1.get_trigger())
# condition evaluates to TRUE
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)<200')
cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)<200")
self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_cond_trigger = {NO_ENTITY: [1792.0, 15.9638]}
self.assertDictEqual(expected_cond_trigger, cond1.get_trigger())
@ -81,23 +75,22 @@ class TestLogStatsParser(unittest.TestCase):
# mock fetch_timeseries() because 'keys_ts' has been pre-populated
self.log_stats_parser.fetch_timeseries = MagicMock()
# condition: evaluate_expression
cond1 = Condition('cond-1')
cond1 = Condition("cond-1")
cond1 = TimeSeriesCondition.create(cond1)
cond1.set_parameter('keys', 'rocksdb.db.get.micros.p50')
cond1.set_parameter('behavior', 'evaluate_expression')
keys = [
'rocksdb.manifest.file.sync.micros.p99',
'rocksdb.db.get.micros.p50'
]
cond1.set_parameter('keys', keys)
cond1.set_parameter('evaluate', 'keys[0]-(keys[1]*100)>500')
cond1.set_parameter("keys", "rocksdb.db.get.micros.p50")
cond1.set_parameter("behavior", "evaluate_expression")
keys = ["rocksdb.manifest.file.sync.micros.p99", "rocksdb.db.get.micros.p50"]
cond1.set_parameter("keys", keys)
cond1.set_parameter("evaluate", "keys[0]-(keys[1]*100)>500")
self.log_stats_parser.check_and_trigger_conditions([cond1])
expected_trigger = {NO_ENTITY: {
1530896414: [9938.0, 16.31508],
1530896440: [9938.0, 16.346602],
1530896466: [9938.0, 16.284669],
1530896492: [9938.0, 16.16005]
}}
expected_trigger = {
NO_ENTITY: {
1530896414: [9938.0, 16.31508],
1530896440: [9938.0, 16.346602],
1530896466: [9938.0, 16.284669],
1530896492: [9938.0, 16.16005],
}
}
self.assertDictEqual(expected_trigger, cond1.get_trigger())
self.log_stats_parser.fetch_timeseries.assert_called_once()
@ -114,13 +107,15 @@ class TestDatabasePerfContext(unittest.TestCase):
perf_ts[key] = {}
start_val = perf_dict[key]
for ix in range(5):
perf_ts[key][timestamp+(ix*10)] = start_val + (2 * ix * ix)
perf_ts[key][timestamp + (ix * 10)] = start_val + (2 * ix * ix)
db_perf_context = DatabasePerfContext(perf_ts, 10, True)
timestamps = [timestamp+(ix*10) for ix in range(1, 5, 1)]
timestamps = [timestamp + (ix * 10) for ix in range(1, 5, 1)]
values = [val for val in range(2, 15, 4)]
inner_dict = {timestamps[ix]: values[ix] for ix in range(4)}
expected_keys_ts = {NO_ENTITY: {
'user_key_comparison_count': inner_dict,
'block_cache_hit_count': inner_dict
}}
expected_keys_ts = {
NO_ENTITY: {
"user_key_comparison_count": inner_dict,
"block_cache_hit_count": inner_dict,
}
}
self.assertDictEqual(expected_keys_ts, db_perf_context.keys_ts)

@ -5,36 +5,32 @@
import os
import unittest
from advisor.rule_parser import RulesSpec
from advisor.db_log_parser import DatabaseLogs, DataSource
from advisor.db_options_parser import DatabaseOptions
from advisor.rule_parser import RulesSpec
RuleToSuggestions = {
"stall-too-many-memtables": [
'inc-bg-flush',
'inc-write-buffer'
],
"stall-too-many-memtables": ["inc-bg-flush", "inc-write-buffer"],
"stall-too-many-L0": [
'inc-max-subcompactions',
'inc-max-bg-compactions',
'inc-write-buffer-size',
'dec-max-bytes-for-level-base',
'inc-l0-slowdown-writes-trigger'
"inc-max-subcompactions",
"inc-max-bg-compactions",
"inc-write-buffer-size",
"dec-max-bytes-for-level-base",
"inc-l0-slowdown-writes-trigger",
],
"stop-too-many-L0": [
'inc-max-bg-compactions',
'inc-write-buffer-size',
'inc-l0-stop-writes-trigger'
"inc-max-bg-compactions",
"inc-write-buffer-size",
"inc-l0-stop-writes-trigger",
],
"stall-too-many-compaction-bytes": [
'inc-max-bg-compactions',
'inc-write-buffer-size',
'inc-hard-pending-compaction-bytes-limit',
'inc-soft-pending-compaction-bytes-limit'
"inc-max-bg-compactions",
"inc-write-buffer-size",
"inc-hard-pending-compaction-bytes-limit",
"inc-soft-pending-compaction-bytes-limit",
],
"level0-level1-ratio": [
'l0-l1-ratio-health-check'
]
"level0-level1-ratio": ["l0-l1-ratio-health-check"],
}
@ -42,19 +38,19 @@ class TestAllRulesTriggered(unittest.TestCase):
def setUp(self):
# load the Rules
this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/triggered_rules.ini')
ini_path = os.path.join(this_path, "input_files/triggered_rules.ini")
self.db_rules = RulesSpec(ini_path)
self.db_rules.load_rules_from_spec()
self.db_rules.perform_section_checks()
# load the data sources: LOG and OPTIONS
log_path = os.path.join(this_path, 'input_files/LOG-0')
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
log_path = os.path.join(this_path, "input_files/LOG-0")
options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
db_options_parser = DatabaseOptions(options_path)
self.column_families = db_options_parser.get_column_families()
db_logs_parser = DatabaseLogs(log_path, self.column_families)
self.data_sources = {
DataSource.Type.DB_OPTIONS: [db_options_parser],
DataSource.Type.LOG: [db_logs_parser]
DataSource.Type.LOG: [db_logs_parser],
}
def test_triggered_conditions(self):
@ -65,8 +61,7 @@ class TestAllRulesTriggered(unittest.TestCase):
self.assertFalse(cond.is_triggered(), repr(cond))
for rule in rules_dict.values():
self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families),
repr(rule)
rule.is_triggered(conditions_dict, self.column_families), repr(rule)
)
# # Trigger the conditions as per the data sources.
@ -99,19 +94,19 @@ class TestConditionsConjunctions(unittest.TestCase):
def setUp(self):
# load the Rules
this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/test_rules.ini')
ini_path = os.path.join(this_path, "input_files/test_rules.ini")
self.db_rules = RulesSpec(ini_path)
self.db_rules.load_rules_from_spec()
self.db_rules.perform_section_checks()
# load the data sources: LOG and OPTIONS
log_path = os.path.join(this_path, 'input_files/LOG-1')
options_path = os.path.join(this_path, 'input_files/OPTIONS-000005')
log_path = os.path.join(this_path, "input_files/LOG-1")
options_path = os.path.join(this_path, "input_files/OPTIONS-000005")
db_options_parser = DatabaseOptions(options_path)
self.column_families = db_options_parser.get_column_families()
db_logs_parser = DatabaseLogs(log_path, self.column_families)
self.data_sources = {
DataSource.Type.DB_OPTIONS: [db_options_parser],
DataSource.Type.LOG: [db_logs_parser]
DataSource.Type.LOG: [db_logs_parser],
}
def test_condition_conjunctions(self):
@ -122,46 +117,43 @@ class TestConditionsConjunctions(unittest.TestCase):
self.assertFalse(cond.is_triggered(), repr(cond))
for rule in rules_dict.values():
self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families),
repr(rule)
rule.is_triggered(conditions_dict, self.column_families), repr(rule)
)
# Trigger the conditions as per the data sources.
self.db_rules.trigger_conditions(self.data_sources)
# Check for the conditions
conds_triggered = ['log-1-true', 'log-2-true', 'log-3-true']
conds_not_triggered = ['log-4-false', 'options-1-false']
conds_triggered = ["log-1-true", "log-2-true", "log-3-true"]
conds_not_triggered = ["log-4-false", "options-1-false"]
for cond in conds_triggered:
self.assertTrue(conditions_dict[cond].is_triggered(), repr(cond))
for cond in conds_not_triggered:
self.assertFalse(conditions_dict[cond].is_triggered(), repr(cond))
# Check for the rules
rules_triggered = ['multiple-conds-true']
rules_triggered = ["multiple-conds-true"]
rules_not_triggered = [
'single-condition-false',
'multiple-conds-one-false',
'multiple-conds-all-false'
"single-condition-false",
"multiple-conds-one-false",
"multiple-conds-all-false",
]
for rule_name in rules_triggered:
rule = rules_dict[rule_name]
self.assertTrue(
rule.is_triggered(conditions_dict, self.column_families),
repr(rule)
rule.is_triggered(conditions_dict, self.column_families), repr(rule)
)
for rule_name in rules_not_triggered:
rule = rules_dict[rule_name]
self.assertFalse(
rule.is_triggered(conditions_dict, self.column_families),
repr(rule)
rule.is_triggered(conditions_dict, self.column_families), repr(rule)
)
class TestSanityChecker(unittest.TestCase):
def setUp(self):
this_path = os.path.abspath(os.path.dirname(__file__))
ini_path = os.path.join(this_path, 'input_files/rules_err1.ini')
ini_path = os.path.join(this_path, "input_files/rules_err1.ini")
db_rules = RulesSpec(ini_path)
db_rules.load_rules_from_spec()
self.rules_dict = db_rules.get_rules_dict()
@ -169,39 +161,39 @@ class TestSanityChecker(unittest.TestCase):
self.suggestions_dict = db_rules.get_suggestions_dict()
def test_rule_missing_suggestions(self):
regex = '.*rule must have at least one suggestion.*'
regex = ".*rule must have at least one suggestion.*"
with self.assertRaisesRegex(ValueError, regex):
self.rules_dict['missing-suggestions'].perform_checks()
self.rules_dict["missing-suggestions"].perform_checks()
def test_rule_missing_conditions(self):
regex = '.*rule must have at least one condition.*'
regex = ".*rule must have at least one condition.*"
with self.assertRaisesRegex(ValueError, regex):
self.rules_dict['missing-conditions'].perform_checks()
self.rules_dict["missing-conditions"].perform_checks()
def test_condition_missing_regex(self):
regex = '.*provide regex for log condition.*'
regex = ".*provide regex for log condition.*"
with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-regex'].perform_checks()
self.conditions_dict["missing-regex"].perform_checks()
def test_condition_missing_options(self):
regex = '.*options missing in condition.*'
regex = ".*options missing in condition.*"
with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-options'].perform_checks()
self.conditions_dict["missing-options"].perform_checks()
def test_condition_missing_expression(self):
regex = '.*expression missing in condition.*'
regex = ".*expression missing in condition.*"
with self.assertRaisesRegex(ValueError, regex):
self.conditions_dict['missing-expression'].perform_checks()
self.conditions_dict["missing-expression"].perform_checks()
def test_suggestion_missing_option(self):
regex = '.*provide option or description.*'
regex = ".*provide option or description.*"
with self.assertRaisesRegex(ValueError, regex):
self.suggestions_dict['missing-option'].perform_checks()
self.suggestions_dict["missing-option"].perform_checks()
def test_suggestion_missing_description(self):
regex = '.*provide option or description.*'
regex = ".*provide option or description.*"
with self.assertRaisesRegex(ValueError, regex):
self.suggestions_dict['missing-description'].perform_checks()
self.suggestions_dict["missing-description"].perform_checks()
class TestParsingErrors(unittest.TestCase):
@ -209,26 +201,26 @@ class TestParsingErrors(unittest.TestCase):
self.this_path = os.path.abspath(os.path.dirname(__file__))
def test_condition_missing_source(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err2.ini')
ini_path = os.path.join(self.this_path, "input_files/rules_err2.ini")
db_rules = RulesSpec(ini_path)
regex = '.*provide source for condition.*'
regex = ".*provide source for condition.*"
with self.assertRaisesRegex(NotImplementedError, regex):
db_rules.load_rules_from_spec()
def test_suggestion_missing_action(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err3.ini')
ini_path = os.path.join(self.this_path, "input_files/rules_err3.ini")
db_rules = RulesSpec(ini_path)
regex = '.*provide action for option.*'
regex = ".*provide action for option.*"
with self.assertRaisesRegex(ValueError, regex):
db_rules.load_rules_from_spec()
def test_section_no_name(self):
ini_path = os.path.join(self.this_path, 'input_files/rules_err4.ini')
ini_path = os.path.join(self.this_path, "input_files/rules_err4.ini")
db_rules = RulesSpec(ini_path)
regex = 'Parsing error: needed section header:.*'
regex = "Parsing error: needed section header:.*"
with self.assertRaisesRegex(ValueError, regex):
db_rules.load_rules_from_spec()
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

@ -4,57 +4,59 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
'''Run benchmark_compare.sh on the most recent build, for CI
'''
"""Run benchmark_compare.sh on the most recent build, for CI
"""
import argparse
import glob
import logging
import os
import re
import shutil
import subprocess
import sys
import logging
logging.basicConfig(level=logging.INFO)
class Config:
def __init__(self, args):
self.version_file = './include/rocksdb/version.h'
self.version_file = "./include/rocksdb/version.h"
self.data_dir = os.path.expanduser(f"{args.db_dir}")
self.results_dir = os.path.expanduser(f"{args.output_dir}")
self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh"
self.benchmark_cwd = f"{os.getcwd()}/tools"
benchmark_env_keys = ['LD_LIBRARY_PATH',
'NUM_KEYS',
'KEY_SIZE',
'VALUE_SIZE',
'CACHE_SIZE_MB',
'DURATION_RW',
'DURATION_RO',
'MB_WRITE_PER_SEC',
'NUM_THREADS',
'COMPRESSION_TYPE',
'MIN_LEVEL_TO_COMPRESS',
'WRITE_BUFFER_SIZE_MB',
'TARGET_FILE_SIZE_BASE_MB',
'MAX_BYTES_FOR_LEVEL_BASE_MB',
'MAX_BACKGROUND_JOBS',
'CACHE_INDEX_AND_FILTER_BLOCKS',
'USE_O_DIRECT',
'STATS_INTERVAL_SECONDS',
'SUBCOMPACTIONS',
'COMPACTION_STYLE',
'CI_TESTS_ONLY']
benchmark_env_keys = [
"LD_LIBRARY_PATH",
"NUM_KEYS",
"KEY_SIZE",
"VALUE_SIZE",
"CACHE_SIZE_MB",
"DURATION_RW",
"DURATION_RO",
"MB_WRITE_PER_SEC",
"NUM_THREADS",
"COMPRESSION_TYPE",
"MIN_LEVEL_TO_COMPRESS",
"WRITE_BUFFER_SIZE_MB",
"TARGET_FILE_SIZE_BASE_MB",
"MAX_BYTES_FOR_LEVEL_BASE_MB",
"MAX_BACKGROUND_JOBS",
"CACHE_INDEX_AND_FILTER_BLOCKS",
"USE_O_DIRECT",
"STATS_INTERVAL_SECONDS",
"SUBCOMPACTIONS",
"COMPACTION_STYLE",
"CI_TESTS_ONLY",
]
def read_version(config):
majorRegex = re.compile(r'#define ROCKSDB_MAJOR\s([0-9]+)')
minorRegex = re.compile(r'#define ROCKSDB_MINOR\s([0-9]+)')
patchRegex = re.compile(r'#define ROCKSDB_PATCH\s([0-9]+)')
with open(config.version_file, 'r') as reader:
majorRegex = re.compile(r"#define ROCKSDB_MAJOR\s([0-9]+)")
minorRegex = re.compile(r"#define ROCKSDB_MINOR\s([0-9]+)")
patchRegex = re.compile(r"#define ROCKSDB_PATCH\s([0-9]+)")
with open(config.version_file, "r") as reader:
major = None
minor = None
patch = None
@ -77,8 +79,7 @@ def read_version(config):
def prepare(version_str, config):
old_files = glob.glob(f"{config.results_dir}/{version_str}/**",
recursive=True)
old_files = glob.glob(f"{config.results_dir}/{version_str}/**", recursive=True)
for f in old_files:
if os.path.isfile(f):
logging.debug(f"remove file {f}")
@ -96,8 +97,10 @@ def prepare(version_str, config):
def results(version_str, config):
# Copy the report TSV file back to the top level of results
shutil.copyfile(f"{config.results_dir}/{version_str}/report.tsv",
f"{config.results_dir}/report.tsv")
shutil.copyfile(
f"{config.results_dir}/{version_str}/report.tsv",
f"{config.results_dir}/report.tsv",
)
def cleanup(version_str, config):
@ -116,32 +119,41 @@ def get_benchmark_env():
def main():
'''Tool for running benchmark_compare.sh on the most recent build, for CI
"""Tool for running benchmark_compare.sh on the most recent build, for CI
This tool will
(1) Work out the current version of RocksDB
(2) Run benchmark_compare with that version alone
'''
"""
parser = argparse.ArgumentParser(
description='benchmark_compare.sh Python wrapper for CI.')
description="benchmark_compare.sh Python wrapper for CI."
)
# --tsvfile is the name of the file to read results from
# --esdocument is the ElasticSearch document to push these results into
#
parser.add_argument('--db_dir', default='~/tmp/rocksdb-benchmark-datadir',
help='Database directory hierarchy to use')
parser.add_argument('--output_dir', default='~/tmp/benchmark-results',
help='Benchmark output goes here')
parser.add_argument('--num_keys', default='10000',
help='Number of database keys to use in benchmark test(s) (determines size of test job)')
parser.add_argument(
"--db_dir",
default="~/tmp/rocksdb-benchmark-datadir",
help="Database directory hierarchy to use",
)
parser.add_argument(
"--output_dir",
default="~/tmp/benchmark-results",
help="Benchmark output goes here",
)
parser.add_argument(
"--num_keys",
default="10000",
help="Number of database keys to use in benchmark test(s) (determines size of test job)",
)
args = parser.parse_args()
config = Config(args)
version = read_version(config)
if version is None:
raise Exception(
f"Could not read RocksDB version from {config.version_file}")
raise Exception(f"Could not read RocksDB version from {config.version_file}")
version_str = f"{version[0]}.{version[1]}.{version[2]}"
logging.info(f"Run benchmark_ci with RocksDB version {version_str}")
@ -149,9 +161,13 @@ def main():
try:
env = get_benchmark_env()
env.append(('NUM_KEYS', args.num_keys))
cmd = [config.benchmark_script,
config.data_dir, config.results_dir, version_str]
env.append(("NUM_KEYS", args.num_keys))
cmd = [
config.benchmark_script,
config.data_dir,
config.results_dir,
version_str,
]
logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}")
subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd)
@ -162,5 +178,5 @@ def main():
return 0
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

@ -8,9 +8,12 @@ import sys
from block_cache_pysim import (
ARCCache,
CacheEntry,
create_cache,
GDSizeCache,
HashTable,
HyperbolicPolicy,
kMicrosInSecond,
kSampleSize,
LFUPolicy,
LinUCBCache,
LRUCache,
@ -18,13 +21,10 @@ from block_cache_pysim import (
MRUPolicy,
OPTCache,
OPTCacheEntry,
run,
ThompsonSamplingCache,
TraceCache,
TraceRecord,
create_cache,
kMicrosInSecond,
kSampleSize,
run,
)

@ -13,6 +13,7 @@ import random
import sys
import matplotlib
matplotlib.use("Agg")
import matplotlib.backends.backend_pdf
import matplotlib.pyplot as plt

@ -15,8 +15,8 @@ for base in ["buckifier", "build_tools", "coverage", "tools"]:
filenames += glob.glob(base + "/" + suff + ".py")
for filename in filenames:
source = open(filename, 'r').read() + '\n'
source = open(filename, "r").read() + "\n"
# Parses and syntax checks the file, throwing on error. (No pyc written.)
_ = compile(source, filename, 'exec')
_ = compile(source, filename, "exec")
print("No syntax errors in {0} .py files".format(len(filenames)))

@ -2,14 +2,15 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals
import argparse
import os
import sys
import time
import random
import tempfile
import subprocess
import shutil
import argparse
import subprocess
import sys
import tempfile
import time
# params overwrite priority:
# for default:
@ -37,8 +38,9 @@ default_params = {
"batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
"memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
"block_size": 16384,
"bloom_bits": lambda: random.choice([random.randint(0,19),
random.lognormvariate(2.3, 1.3)]),
"bloom_bits": lambda: random.choice(
[random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
),
"cache_index_and_filter_blocks": lambda: random.randint(0, 1),
"cache_size": 8388608,
"charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
@ -47,12 +49,14 @@ default_params = {
"charge_file_metadata": lambda: random.choice([0, 1]),
"checkpoint_one_in": 1000000,
"compression_type": lambda: random.choice(
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
"bottommost_compression_type": lambda:
"disable" if random.randint(0, 1) == 0 else
random.choice(
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
"checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]),
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]
),
"bottommost_compression_type": lambda: "disable"
if random.randint(0, 1) == 0
else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
"checksum_type": lambda: random.choice(
["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]
),
"compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
"compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
# Disabled compression_parallel_threads as the feature is not stable
@ -93,12 +97,12 @@ default_params = {
# the random seed, so the same keys are chosen by every run for disallowing
# overwrites.
"nooverwritepercent": 1,
"open_files": lambda : random.choice([-1, -1, 100, 500000]),
"open_files": lambda: random.choice([-1, -1, 100, 500000]),
"optimize_filters_for_memory": lambda: random.randint(0, 1),
"partition_filters": lambda: random.randint(0, 1),
"partition_pinning": lambda: random.randint(0, 3),
"pause_background_one_in": 1000000,
"prefix_size" : lambda: random.choice([-1, 1, 5, 7, 8]),
"prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]),
"prefixpercent": 5,
"progress_reports": 0,
"readpercent": 45,
@ -117,7 +121,7 @@ default_params = {
"use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
"mock_direct_io": False,
"cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
# fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
# fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
"use_full_merge_v1": lambda: random.randint(0, 1),
"use_merge": lambda: random.randint(0, 1),
# 999 -> use Bloom API
@ -128,34 +132,36 @@ default_params = {
"writepercent": 35,
"format_version": lambda: random.choice([2, 3, 4, 5, 5]),
"index_block_restart_interval": lambda: random.choice(range(1, 16)),
"use_multiget" : lambda: random.randint(0, 1),
"periodic_compaction_seconds" :
lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
"compaction_ttl" : lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
"use_multiget": lambda: random.randint(0, 1),
"periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
"compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
# Test small max_manifest_file_size in a smaller chance, as most of the
# time we wnat manifest history to be preserved to help debug
"max_manifest_file_size" : lambda : random.choice(
[t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]),
"max_manifest_file_size": lambda: random.choice(
[t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]
),
# Sync mode might make test runs slower so running it in a smaller chance
"sync" : lambda : random.choice(
[1 if t == 0 else 0 for t in range(0, 20)]),
"sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]),
"bytes_per_sync": lambda: random.choice([0, 262144]),
"wal_bytes_per_sync": lambda: random.choice([0, 524288]),
# Disable compaction_readahead_size because the test is not passing.
#"compaction_readahead_size" : lambda : random.choice(
# "compaction_readahead_size" : lambda : random.choice(
# [0, 0, 1024 * 1024]),
"db_write_buffer_size" : lambda: random.choice(
[0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]),
"avoid_unnecessary_blocking_io" : random.randint(0, 1),
"write_dbid_to_manifest" : random.randint(0, 1),
"avoid_flush_during_recovery" : lambda: random.choice(
[1 if t == 0 else 0 for t in range(0, 8)]),
"max_write_batch_group_size_bytes" : lambda: random.choice(
[16, 64, 1024 * 1024, 16 * 1024 * 1024]),
"level_compaction_dynamic_level_bytes" : True,
"db_write_buffer_size": lambda: random.choice(
[0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]
),
"avoid_unnecessary_blocking_io": random.randint(0, 1),
"write_dbid_to_manifest": random.randint(0, 1),
"avoid_flush_during_recovery": lambda: random.choice(
[1 if t == 0 else 0 for t in range(0, 8)]
),
"max_write_batch_group_size_bytes": lambda: random.choice(
[16, 64, 1024 * 1024, 16 * 1024 * 1024]
),
"level_compaction_dynamic_level_bytes": True,
"verify_checksum_one_in": 1000000,
"verify_db_one_in": 100000,
"continuous_verification_interval" : 0,
"continuous_verification_interval": 0,
"max_key_len": 3,
"key_len_percent_dist": "1,30,69",
"read_fault_one_in": lambda: random.choice([0, 32, 1000]),
@ -166,10 +172,11 @@ default_params = {
"get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"max_write_buffer_size_to_maintain": lambda: random.choice(
[0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]),
[0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]
),
"user_timestamp_size": 0,
"secondary_cache_fault_one_in" : lambda: random.choice([0, 0, 32]),
"prepopulate_block_cache" : lambda: random.choice([0, 1]),
"secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]),
"prepopulate_block_cache": lambda: random.choice([0, 1]),
"memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]),
"memtable_whole_key_filtering": lambda: random.randint(0, 1),
"detect_filter_construct_corruption": lambda: random.choice([0, 1]),
@ -177,9 +184,13 @@ default_params = {
"async_io": lambda: random.choice([0, 1]),
"wal_compression": lambda: random.choice(["none", "zstd"]),
"verify_sst_unique_id_in_manifest": 1, # always do unique_id verification
"secondary_cache_uri": lambda: random.choice(
["", "compressed_secondary_cache://capacity=8388608",
"compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true"]),
"secondary_cache_uri": lambda: random.choice(
[
"",
"compressed_secondary_cache://capacity=8388608",
"compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true",
]
),
"allow_data_in_errors": True,
"readahead_size": lambda: random.choice([0, 16384, 524288]),
"initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
@ -187,11 +198,12 @@ default_params = {
"num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]),
}
_TEST_DIR_ENV_VAR = 'TEST_TMPDIR'
_DEBUG_LEVEL_ENV_VAR = 'DEBUG_LEVEL'
_TEST_DIR_ENV_VAR = "TEST_TMPDIR"
_DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL"
stress_cmd = "./db_stress"
def is_release_mode():
return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"
@ -207,7 +219,10 @@ def get_dbname(test_name):
os.mkdir(dbname)
return dbname
expected_values_dir = None
def setup_expected_values_dir():
global expected_values_dir
if expected_values_dir is not None:
@ -215,8 +230,7 @@ def setup_expected_values_dir():
expected_dir_prefix = "rocksdb_crashtest_expected_"
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is None or test_tmpdir == "":
expected_values_dir = tempfile.mkdtemp(
prefix=expected_dir_prefix)
expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix)
else:
# if tmpdir is specified, store the expected_values_dir under that dir
expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected"
@ -225,7 +239,10 @@ def setup_expected_values_dir():
os.mkdir(expected_values_dir)
return expected_values_dir
multiops_txn_key_spaces_file = None
def setup_multiops_txn_key_spaces_file():
global multiops_txn_key_spaces_file
if multiops_txn_key_spaces_file is not None:
@ -233,13 +250,15 @@ def setup_multiops_txn_key_spaces_file():
key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces"
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is None or test_tmpdir == "":
multiops_txn_key_spaces_file = tempfile.mkstemp(
prefix=key_spaces_file_prefix)[1]
multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[
1
]
else:
if not os.path.exists(test_tmpdir):
os.mkdir(test_tmpdir)
multiops_txn_key_spaces_file = tempfile.mkstemp(
prefix=key_spaces_file_prefix, dir=test_tmpdir)[1]
prefix=key_spaces_file_prefix, dir=test_tmpdir
)[1]
return multiops_txn_key_spaces_file
@ -291,7 +310,7 @@ simple_default_params = {
"write_buffer_size": 32 * 1024 * 1024,
"level_compaction_dynamic_level_bytes": False,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"verify_iterator_with_expected_state_one_in": 5 # this locks a range of keys
"verify_iterator_with_expected_state_one_in": 5, # this locks a range of keys
}
blackbox_simple_default_params = {
@ -317,7 +336,7 @@ cf_consistency_params = {
}
txn_params = {
"use_txn" : 1,
"use_txn": 1,
# Avoid lambda to set it once for the entire test
"txn_write_policy": random.randint(0, 2),
"unordered_write": random.randint(0, 1),
@ -347,10 +366,14 @@ blob_params = {
"blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]),
"blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]),
"enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3),
"blob_garbage_collection_age_cutoff": lambda: random.choice([0.0, 0.25, 0.5, 0.75, 1.0]),
"blob_garbage_collection_age_cutoff": lambda: random.choice(
[0.0, 0.25, 0.5, 0.75, 1.0]
),
"blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
"blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
"blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]),
"blob_file_starting_level": lambda: random.choice(
[0] * 4 + [1] * 3 + [2] * 2 + [3]
),
"use_blob_cache": lambda: random.randint(0, 1),
"use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
"blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
@ -418,7 +441,7 @@ multiops_txn_default_params = {
# compactions.
"flush_one_in": 1000,
"key_spaces_path": setup_multiops_txn_key_spaces_file(),
"rollback_one_in": 4,
"rollback_one_in": 4,
# Re-enable once we have a compaction for MultiOpsTxnStressTest
"enable_compaction_filter": 0,
"create_timestamped_snapshot_one_in": 50,
@ -446,11 +469,11 @@ multiops_wp_txn_params = {
"create_timestamped_snapshot_one_in": 0,
}
def finalize_and_sanitize(src_params):
dest_params = dict([(k, v() if callable(v) else v)
for (k, v) in src_params.items()])
dest_params = {k : v() if callable(v) else v for (k, v) in src_params.items()}
if is_release_mode():
dest_params['read_fault_one_in'] = 0
dest_params["read_fault_one_in"] = 0
if dest_params.get("compression_max_dict_bytes") == 0:
dest_params["compression_zstd_max_train_bytes"] = 0
dest_params["compression_max_dict_buffer_bytes"] = 0
@ -466,13 +489,15 @@ def finalize_and_sanitize(src_params):
# used by `IngestExternalFile()`, causing it to fail with mmap
# reads. Remove this once it is fixed.
dest_params["ingest_external_file_one_in"] = 0
if (dest_params["use_direct_io_for_flush_and_compaction"] == 1
or dest_params["use_direct_reads"] == 1) and \
not is_direct_io_supported(dest_params["db"]):
if (
dest_params["use_direct_io_for_flush_and_compaction"] == 1
or dest_params["use_direct_reads"] == 1
) and not is_direct_io_supported(dest_params["db"]):
if is_release_mode():
print("{} does not support direct IO. Disabling use_direct_reads and "
"use_direct_io_for_flush_and_compaction.\n".format(
dest_params["db"]))
print(
"{} does not support direct IO. Disabling use_direct_reads and "
"use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"])
)
dest_params["use_direct_reads"] = 0
dest_params["use_direct_io_for_flush_and_compaction"] = 0
else:
@ -480,18 +505,22 @@ def finalize_and_sanitize(src_params):
# Multi-key operations are not currently compatible with transactions or
# timestamp.
if (dest_params.get("test_batches_snapshots") == 1 or
dest_params.get("use_txn") == 1 or
dest_params.get("user_timestamp_size") > 0):
if (
dest_params.get("test_batches_snapshots") == 1
or dest_params.get("use_txn") == 1
or dest_params.get("user_timestamp_size") > 0
):
dest_params["delpercent"] += dest_params["delrangepercent"]
dest_params["delrangepercent"] = 0
dest_params["ingest_external_file_one_in"] = 0
# Correctness testing with unsync data loss is not currently compatible
# with transactions
if (dest_params.get("use_txn") == 1):
if dest_params.get("use_txn") == 1:
dest_params["sync_fault_injection"] = 0
if (dest_params.get("disable_wal") == 1 or
dest_params.get("sync_fault_injection") == 1):
if (
dest_params.get("disable_wal") == 1
or dest_params.get("sync_fault_injection") == 1
):
# File ingestion does not guarantee prefix-recoverability when unsynced
# data can be lost. Ingesting a file syncs data immediately that is
# newer than unsynced memtable data that can be lost on restart.
@ -544,8 +573,10 @@ def finalize_and_sanitize(src_params):
dest_params["readpercent"] += dest_params.get("prefixpercent", 20)
dest_params["prefixpercent"] = 0
dest_params["test_batches_snapshots"] = 0
if (dest_params.get("prefix_size") == -1 and
dest_params.get("memtable_whole_key_filtering") == 0):
if (
dest_params.get("prefix_size") == -1
and dest_params.get("memtable_whole_key_filtering") == 0
):
dest_params["memtable_prefix_bloom_size_ratio"] = 0
if dest_params.get("two_write_queues") == 1:
dest_params["enable_pipelined_write"] = 0
@ -566,19 +597,20 @@ def finalize_and_sanitize(src_params):
return dest_params
def gen_cmd_params(args):
params = {}
params.update(default_params)
if args.test_type == 'blackbox':
if args.test_type == "blackbox":
params.update(blackbox_default_params)
if args.test_type == 'whitebox':
if args.test_type == "whitebox":
params.update(whitebox_default_params)
if args.simple:
params.update(simple_default_params)
if args.test_type == 'blackbox':
if args.test_type == "blackbox":
params.update(blackbox_simple_default_params)
if args.test_type == 'whitebox':
if args.test_type == "whitebox":
params.update(whitebox_simple_default_params)
if args.cf_consistency:
params.update(cf_consistency_params)
@ -590,9 +622,9 @@ def gen_cmd_params(args):
params.update(ts_params)
if args.test_multiops_txn:
params.update(multiops_txn_default_params)
if args.write_policy == 'write_committed':
if args.write_policy == "write_committed":
params.update(multiops_wc_txn_params)
elif args.write_policy == 'write_prepared':
elif args.write_policy == "write_prepared":
params.update(multiops_wp_txn_params)
if args.test_tiered_storage:
params.update(tiered_params)
@ -600,9 +632,12 @@ def gen_cmd_params(args):
# Best-effort recovery, user defined timestamp, tiered storage are currently
# incompatible with BlobDB. Test BE recovery if specified on the command
# line; otherwise, apply BlobDB related overrides with a 10% chance.
if (not args.test_best_efforts_recovery and
not args.enable_ts and not args.test_tiered_storage and
random.choice([0] * 9 + [1]) == 1):
if (
not args.test_best_efforts_recovery
and not args.enable_ts
and not args.test_tiered_storage
and random.choice([0] * 9 + [1]) == 1
):
params.update(blob_params)
for k, v in vars(args).items():
@ -613,68 +648,87 @@ def gen_cmd_params(args):
def gen_cmd(params, unknown_params):
finalzied_params = finalize_and_sanitize(params)
cmd = [stress_cmd] + [
'--{0}={1}'.format(k, v)
for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
if k not in set(['test_type', 'simple', 'duration', 'interval',
'random_kill_odd', 'cf_consistency', 'txn',
'test_best_efforts_recovery', 'enable_ts',
'test_multiops_txn', 'write_policy', 'stress_cmd',
'test_tiered_storage'])
and v is not None] + unknown_params
cmd = (
[stress_cmd]
+ [
"--{0}={1}".format(k, v)
for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
if k
not in {
"test_type",
"simple",
"duration",
"interval",
"random_kill_odd",
"cf_consistency",
"txn",
"test_best_efforts_recovery",
"enable_ts",
"test_multiops_txn",
"write_policy",
"stress_cmd",
"test_tiered_storage",
}
and v is not None
]
+ unknown_params
)
return cmd
def execute_cmd(cmd, timeout):
child = subprocess.Popen(cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
print("Running db_stress with pid=%d: %s\n\n"
% (child.pid, ' '.join(cmd)))
child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
try:
outs, errs = child.communicate(timeout=timeout)
hit_timeout = False
print("WARNING: db_stress ended before kill: exitcode=%d\n"
% child.returncode)
print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
except subprocess.TimeoutExpired:
hit_timeout = True
child.kill()
print("KILLED %d\n" % child.pid)
outs, errs = child.communicate()
return hit_timeout, child.returncode, outs.decode('utf-8'), errs.decode('utf-8')
return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8")
# This script runs and kills db_stress multiple times. It checks consistency
# in case of unsafe crashes in RocksDB.
def blackbox_crash_main(args, unknown_args):
cmd_params = gen_cmd_params(args)
dbname = get_dbname('blackbox')
exit_time = time.time() + cmd_params['duration']
print("Running blackbox-crash-test with \n"
+ "interval_between_crash=" + str(cmd_params['interval']) + "\n"
+ "total-duration=" + str(cmd_params['duration']) + "\n")
dbname = get_dbname("blackbox")
exit_time = time.time() + cmd_params["duration"]
print(
"Running blackbox-crash-test with \n"
+ "interval_between_crash="
+ str(cmd_params["interval"])
+ "\n"
+ "total-duration="
+ str(cmd_params["duration"])
+ "\n"
)
while time.time() < exit_time:
cmd = gen_cmd(dict(
list(cmd_params.items())
+ list({'db': dbname}.items())), unknown_args)
cmd = gen_cmd(
dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
)
hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params['interval'])
hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"])
if not hit_timeout:
print('Exit Before Killing')
print('stdout:')
print("Exit Before Killing")
print("stdout:")
print(outs)
print('stderr:')
print("stderr:")
print(errs)
sys.exit(2)
for line in errs.split('\n'):
if line != '' and not line.startswith('WARNING'):
print('stderr has error message:')
print('***' + line + '***')
for line in errs.split("\n"):
if line != "" and not line.startswith("WARNING"):
print("stderr has error message:")
print("***" + line + "***")
time.sleep(1) # time to stabilize before the next run
@ -688,89 +742,109 @@ def blackbox_crash_main(args, unknown_args):
# kill_random_test that causes rocksdb to crash at various points in code.
def whitebox_crash_main(args, unknown_args):
cmd_params = gen_cmd_params(args)
dbname = get_dbname('whitebox')
dbname = get_dbname("whitebox")
cur_time = time.time()
exit_time = cur_time + cmd_params['duration']
half_time = cur_time + cmd_params['duration'] // 2
exit_time = cur_time + cmd_params["duration"]
half_time = cur_time + cmd_params["duration"] // 2
print("Running whitebox-crash-test with \n"
+ "total-duration=" + str(cmd_params['duration']) + "\n")
print(
"Running whitebox-crash-test with \n"
+ "total-duration="
+ str(cmd_params["duration"])
+ "\n"
)
total_check_mode = 4
check_mode = 0
kill_random_test = cmd_params['random_kill_odd']
kill_random_test = cmd_params["random_kill_odd"]
kill_mode = 0
while time.time() < exit_time:
if check_mode == 0:
additional_opts = {
# use large ops per thread since we will kill it anyway
"ops_per_thread": 100 * cmd_params['ops_per_thread'],
"ops_per_thread": 100
* cmd_params["ops_per_thread"],
}
# run with kill_random_test, with three modes.
# Mode 0 covers all kill points. Mode 1 covers less kill points but
# increases change of triggering them. Mode 2 covers even less
# frequent kill points and further increases triggering change.
if kill_mode == 0:
additional_opts.update({
"kill_random_test": kill_random_test,
})
additional_opts.update(
{
"kill_random_test": kill_random_test,
}
)
elif kill_mode == 1:
if cmd_params.get('disable_wal', 0) == 1:
if cmd_params.get("disable_wal", 0) == 1:
my_kill_odd = kill_random_test // 50 + 1
else:
my_kill_odd = kill_random_test // 10 + 1
additional_opts.update({
"kill_random_test": my_kill_odd,
"kill_exclude_prefixes": "WritableFileWriter::Append,"
+ "WritableFileWriter::WriteBuffered",
})
additional_opts.update(
{
"kill_random_test": my_kill_odd,
"kill_exclude_prefixes": "WritableFileWriter::Append,"
+ "WritableFileWriter::WriteBuffered",
}
)
elif kill_mode == 2:
# TODO: May need to adjust random odds if kill_random_test
# is too small.
additional_opts.update({
"kill_random_test": (kill_random_test // 5000 + 1),
"kill_exclude_prefixes": "WritableFileWriter::Append,"
"WritableFileWriter::WriteBuffered,"
"PosixMmapFile::Allocate,WritableFileWriter::Flush",
})
additional_opts.update(
{
"kill_random_test": (kill_random_test // 5000 + 1),
"kill_exclude_prefixes": "WritableFileWriter::Append,"
"WritableFileWriter::WriteBuffered,"
"PosixMmapFile::Allocate,WritableFileWriter::Flush",
}
)
# Run kill mode 0, 1 and 2 by turn.
kill_mode = (kill_mode + 1) % 3
elif check_mode == 1:
# normal run with universal compaction mode
additional_opts = {
"kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'],
"ops_per_thread": cmd_params["ops_per_thread"],
"compaction_style": 1,
}
# Single level universal has a lot of special logic. Ensure we cover
# it sometimes.
if random.randint(0, 1) == 1:
additional_opts.update({
"num_levels": 1,
})
additional_opts.update(
{
"num_levels": 1,
}
)
elif check_mode == 2:
# normal run with FIFO compaction mode
# ops_per_thread is divided by 5 because FIFO compaction
# style is quite a bit slower on reads with lot of files
additional_opts = {
"kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'] // 5,
"ops_per_thread": cmd_params["ops_per_thread"] // 5,
"compaction_style": 2,
}
else:
# normal run
additional_opts = {
"kill_random_test": None,
"ops_per_thread": cmd_params['ops_per_thread'],
"ops_per_thread": cmd_params["ops_per_thread"],
}
cmd = gen_cmd(dict(list(cmd_params.items())
+ list(additional_opts.items())
+ list({'db': dbname}.items())), unknown_args)
cmd = gen_cmd(
dict(
list(cmd_params.items())
+ list(additional_opts.items())
+ list({"db": dbname}.items())
),
unknown_args,
)
print("Running:" + ' '.join(cmd) + "\n") # noqa: E999 T25377293 Grandfathered in
print(
"Running:" + " ".join(cmd) + "\n"
) # noqa: E999 T25377293 Grandfathered in
# If the running time is 15 minutes over the run time, explicit kill and
# exit even if white box kill didn't hit. This is to guarantee run time
@ -779,9 +853,11 @@ def whitebox_crash_main(args, unknown_args):
# TODO detect a hanging condition. The job might run too long as RocksDB
# hits a hanging bug.
hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd(
cmd, exit_time - time.time() + 900)
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
check_mode, additional_opts['kill_random_test'], retncode))
cmd, exit_time - time.time() + 900
)
msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format(
check_mode, additional_opts["kill_random_test"], retncode
)
print(msg)
print(stdoutdata)
@ -792,10 +868,10 @@ def whitebox_crash_main(args, unknown_args):
break
expected = False
if additional_opts['kill_random_test'] is None and (retncode == 0):
if additional_opts["kill_random_test"] is None and (retncode == 0):
# we expect zero retncode if no kill option
expected = True
elif additional_opts['kill_random_test'] is not None and retncode <= 0:
elif additional_opts["kill_random_test"] is not None and retncode <= 0:
# When kill option is given, the test MIGHT kill itself.
# If it does, negative retncode is expected. Otherwise 0.
expected = True
@ -805,15 +881,13 @@ def whitebox_crash_main(args, unknown_args):
sys.exit(1)
stderrdata = stderrdata.lower()
errorcount = (stderrdata.count('error') -
stderrdata.count('got errors 0 times'))
print("#times error occurred in output is " + str(errorcount) +
"\n")
errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times")
print("#times error occurred in output is " + str(errorcount) + "\n")
if (errorcount > 0):
if errorcount > 0:
print("TEST FAILED. Output has 'error'!!!\n")
sys.exit(2)
if (stderrdata.find('fail') >= 0):
if stderrdata.find("fail") >= 0:
print("TEST FAILED. Output has 'fail'!!!\n")
sys.exit(2)
@ -824,7 +898,7 @@ def whitebox_crash_main(args, unknown_args):
# success
shutil.rmtree(dbname, True)
os.mkdir(dbname)
cmd_params.pop('expected_values_dir', None)
cmd_params.pop("expected_values_dir", None)
check_mode = (check_mode + 1) % total_check_mode
time.sleep(1) # time to stabilize after a kill
@ -833,34 +907,38 @@ def whitebox_crash_main(args, unknown_args):
def main():
global stress_cmd
parser = argparse.ArgumentParser(description="This script runs and kills \
db_stress multiple times")
parser = argparse.ArgumentParser(
description="This script runs and kills \
db_stress multiple times"
)
parser.add_argument("test_type", choices=["blackbox", "whitebox"])
parser.add_argument("--simple", action="store_true")
parser.add_argument("--cf_consistency", action='store_true')
parser.add_argument("--txn", action='store_true')
parser.add_argument("--test_best_efforts_recovery", action='store_true')
parser.add_argument("--enable_ts", action='store_true')
parser.add_argument("--test_multiops_txn", action='store_true')
parser.add_argument("--cf_consistency", action="store_true")
parser.add_argument("--txn", action="store_true")
parser.add_argument("--test_best_efforts_recovery", action="store_true")
parser.add_argument("--enable_ts", action="store_true")
parser.add_argument("--test_multiops_txn", action="store_true")
parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
parser.add_argument("--stress_cmd")
parser.add_argument("--test_tiered_storage", action='store_true')
all_params = dict(list(default_params.items())
+ list(blackbox_default_params.items())
+ list(whitebox_default_params.items())
+ list(simple_default_params.items())
+ list(blackbox_simple_default_params.items())
+ list(whitebox_simple_default_params.items())
+ list(blob_params.items())
+ list(ts_params.items())
+ list(multiops_txn_default_params.items())
+ list(multiops_wc_txn_params.items())
+ list(multiops_wp_txn_params.items())
+ list(best_efforts_recovery_params.items())
+ list(cf_consistency_params.items())
+ list(tiered_params.items())
+ list(txn_params.items()))
parser.add_argument("--test_tiered_storage", action="store_true")
all_params = dict(
list(default_params.items())
+ list(blackbox_default_params.items())
+ list(whitebox_default_params.items())
+ list(simple_default_params.items())
+ list(blackbox_simple_default_params.items())
+ list(whitebox_simple_default_params.items())
+ list(blob_params.items())
+ list(ts_params.items())
+ list(multiops_txn_default_params.items())
+ list(multiops_wc_txn_params.items())
+ list(multiops_wp_txn_params.items())
+ list(best_efforts_recovery_params.items())
+ list(cf_consistency_params.items())
+ list(tiered_params.items())
+ list(txn_params.items())
)
for k, v in all_params.items():
parser.add_argument("--" + k, type=type(v() if callable(v) else v))
@ -869,15 +947,17 @@ def main():
test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
if test_tmpdir is not None and not os.path.isdir(test_tmpdir):
print('%s env var is set to a non-existent directory: %s' %
(_TEST_DIR_ENV_VAR, test_tmpdir))
print(
"%s env var is set to a non-existent directory: %s"
% (_TEST_DIR_ENV_VAR, test_tmpdir)
)
sys.exit(1)
if args.stress_cmd:
stress_cmd = args.stress_cmd
if args.test_type == 'blackbox':
if args.test_type == "blackbox":
blackbox_crash_main(args, unknown_args)
if args.test_type == 'whitebox':
if args.test_type == "whitebox":
whitebox_crash_main(args, unknown_args)
# Only delete the `expected_values_dir` if test passes
if expected_values_dir is not None:
@ -886,5 +966,5 @@ def main():
os.remove(multiops_txn_key_spaces_file)
if __name__ == '__main__':
if __name__ == "__main__":
main()

@ -2,65 +2,72 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
import os
import os.path
import re
import shutil
import subprocess
import tempfile
import time
import unittest
import tempfile
import re
def my_check_output(*popenargs, **kwargs):
"""
If we had python 2.7, we should simply use subprocess.check_output.
This is a stop-gap solution for python 2.6
"""
if 'stdout' in kwargs:
raise ValueError('stdout argument not allowed, it will be overridden.')
process = subprocess.Popen(stderr=subprocess.PIPE, stdout=subprocess.PIPE,
*popenargs, **kwargs)
if "stdout" in kwargs:
raise ValueError("stdout argument not allowed, it will be overridden.")
process = subprocess.Popen(
stderr=subprocess.PIPE, stdout=subprocess.PIPE, *popenargs, **kwargs
)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise Exception("Exit code is not 0. It is %d. Command: %s" %
(retcode, cmd))
return output.decode('utf-8')
raise Exception("Exit code is not 0. It is %d. Command: %s" % (retcode, cmd))
return output.decode("utf-8")
def run_err_null(cmd):
return os.system(cmd + " 2>/dev/null ")
class LDBTestCase(unittest.TestCase):
def setUp(self):
self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_")
self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_")
self.DB_NAME = "testdb"
def tearDown(self):
assert(self.TMP_DIR.strip() != "/"
and self.TMP_DIR.strip() != "/tmp"
and self.TMP_DIR.strip() != "/tmp/") #Just some paranoia
assert (
self.TMP_DIR.strip() != "/"
and self.TMP_DIR.strip() != "/tmp"
and self.TMP_DIR.strip() != "/tmp/"
) # Just some paranoia
shutil.rmtree(self.TMP_DIR)
def dbParam(self, dbName):
return "--db=%s" % os.path.join(self.TMP_DIR, dbName)
def assertRunOKFull(self, params, expectedOutput, unexpected=False,
isPattern=False):
def assertRunOKFull(
self, params, expectedOutput, unexpected=False, isPattern=False
):
"""
All command-line params must be specified.
Allows full flexibility in testing; for example: missing db param.
"""
output = my_check_output("./ldb %s |grep -v \"Created bg thread\"" %
params, shell=True)
output = my_check_output(
'./ldb %s |grep -v "Created bg thread"' % params, shell=True
)
if not unexpected:
if isPattern:
self.assertNotEqual(expectedOutput.search(output.strip()),
None)
self.assertNotEqual(expectedOutput.search(output.strip()), None)
else:
self.assertEqual(output.strip(), expectedOutput.strip())
else:
@ -76,20 +83,25 @@ class LDBTestCase(unittest.TestCase):
"""
try:
my_check_output("./ldb %s >/dev/null 2>&1 |grep -v \"Created bg \
thread\"" % params, shell=True)
my_check_output(
'./ldb %s >/dev/null 2>&1 |grep -v "Created bg \
thread"'
% params,
shell=True,
)
except Exception:
return
self.fail(
"Exception should have been raised for command with params: %s" %
params)
"Exception should have been raised for command with params: %s" % params
)
def assertRunOK(self, params, expectedOutput, unexpected=False):
"""
Uses the default test db.
"""
self.assertRunOKFull("%s %s" % (self.dbParam(self.DB_NAME), params),
expectedOutput, unexpected)
self.assertRunOKFull(
"%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected
)
def assertRunFAIL(self, params):
"""
@ -118,16 +130,17 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("scan --to=x2", "x1 : y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=2",
"x1 : y1\nx2 : y2")
self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 : y1\nx2 : y2")
self.assertRunOK("scan --from=x1 --to=z --max_keys=3",
"x1 : y1\nx2 : y2\nx3 : y3")
self.assertRunOK("scan --from=x1 --to=z --max_keys=4",
"x1 : y1\nx2 : y2\nx3 : y3")
self.assertRunOK(
"scan --from=x1 --to=z --max_keys=3", "x1 : y1\nx2 : y2\nx3 : y3"
)
self.assertRunOK(
"scan --from=x1 --to=z --max_keys=4", "x1 : y1\nx2 : y2\nx3 : y3"
)
self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1")
self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3")
self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL
self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL
self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3")
@ -148,18 +161,18 @@ class LDBTestCase(unittest.TestCase):
return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params))
def writeExternSst(self, params, inputDumpFile, outputSst):
return 0 == run_err_null("cat %s | ./ldb write_extern_sst %s %s"
% (inputDumpFile, outputSst, params))
return 0 == run_err_null(
"cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params)
)
def ingestExternSst(self, params, inputSst):
return 0 == run_err_null("./ldb ingest_extern_sst %s %s"
% (inputSst, params))
return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params))
def testStringBatchPut(self):
print("Running testStringBatchPut...")
self.assertRunOK("batchput x1 y1 --create_if_missing", "OK")
self.assertRunOK("scan", "x1 : y1")
self.assertRunOK("batchput x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK")
self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz")
self.assertRunFAIL("batchput")
self.assertRunFAIL("batchput k1")
@ -171,7 +184,9 @@ class LDBTestCase(unittest.TestCase):
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("scan", "x1 : y1")
self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK")
self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz")
blob_files = self.getBlobFiles(dbPath)
@ -195,13 +210,18 @@ class LDBTestCase(unittest.TestCase):
print("Running testBlobStartingLevel...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1", "OK")
self.assertRunOK(
"put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1",
"OK",
)
self.assertRunOK("get x1", "y1")
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) == 0)
self.assertRunOK("put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK")
self.assertRunOK(
"put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK"
)
self.assertRunOK("get x1", "y1")
self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3")
@ -213,19 +233,37 @@ class LDBTestCase(unittest.TestCase):
print("Running testCountDelimDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("dump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("dump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK(
"dump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'dump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("dump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8")
self.assertRunOK(
'dump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testCountDelimIDump(self):
print("Running testCountDelimIDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK("idump --count_delim", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK("idump --count_delim=\".\"", "x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8")
self.assertRunOK(
"idump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'idump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK("idump --count_delim=\",\"", "x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8")
self.assertRunOK(
'idump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testInvalidCmdLines(self):
print("Running testInvalidCmdLines...")
@ -253,12 +291,13 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("get --key_hex 0x6132", "b2")
self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232")
self.assertRunOK("get --value_hex a2", "0x6232")
self.assertRunOK("scan --key_hex --value_hex",
"0x6131 : 0x6231\n0x6132 : 0x6232")
self.assertRunOK("scan --hex --from=0x6131 --to=0x6133",
"0x6131 : 0x6231\n0x6132 : 0x6232")
self.assertRunOK("scan --hex --from=0x6131 --to=0x6132",
"0x6131 : 0x6231")
self.assertRunOK(
"scan --key_hex --value_hex", "0x6131 : 0x6231\n0x6132 : 0x6232"
)
self.assertRunOK(
"scan --hex --from=0x6131 --to=0x6133", "0x6131 : 0x6231\n0x6132 : 0x6232"
)
self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 : 0x6231")
self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2")
self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232")
self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK")
@ -272,8 +311,7 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK")
self.assertRunOK("scan --hex", "0x6131 : 0x6231", True)
self.assertRunOK("dump --ttl ", "a1 ==> b1", True)
self.assertRunOK("dump --hex --ttl ",
"0x6131 ==> 0x6231\nKeys in range: 1")
self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1")
self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231")
self.assertRunOK("get --value_hex a1", "0x6231", True)
self.assertRunOK("get --ttl a1", "b1")
@ -295,8 +333,7 @@ class LDBTestCase(unittest.TestCase):
def testDumpLoad(self):
print("Running testDumpLoad...")
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4",
"OK")
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
@ -304,98 +341,125 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load in hex
dumpFilePath = os.path.join(self.TMP_DIR, "dump2")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2")
self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(
self.loadDb(
"--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump only a portion of the key range
dumpFilePath = os.path.join(self.TMP_DIR, "dump3")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3")
self.assertTrue(self.dumpDb(
"--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath))
self.assertTrue(
self.dumpDb("--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)
)
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2")
# Dump upto max_keys rows
dumpFilePath = os.path.join(self.TMP_DIR, "dump4")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4")
self.assertTrue(self.dumpDb(
"--db=%s --max_keys=3" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3")
self.assertTrue(self.dumpDb("--db=%s --max_keys=3" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3")
# Load into an existing db, create_if_missing is not specified
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load with WAL disabled
dumpFilePath = os.path.join(self.TMP_DIR, "dump5")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --disable_wal --create_if_missing" % loadedDbPath,
dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(
self.loadDb(
"--db=%s --disable_wal --create_if_missing" % loadedDbPath, dumpFilePath
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump and load with lots of extra params specified
extraParams = " ".join(["--bloom_bits=14", "--block_size=1024",
"--auto_compaction=true",
"--write_buffer_size=4194304",
"--file_size=2097152"])
extraParams = " ".join(
[
"--bloom_bits=14",
"--block_size=1024",
"--auto_compaction=true",
"--write_buffer_size=4194304",
"--file_size=2097152",
]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump6")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6")
self.assertTrue(self.dumpDb(
"--db=%s %s" % (origDbPath, extraParams), dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s %s --create_if_missing" % (loadedDbPath, extraParams),
dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(
self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath)
)
self.assertTrue(
self.loadDb(
"--db=%s %s --create_if_missing" % (loadedDbPath, extraParams),
dumpFilePath,
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
# Dump with count_only
dumpFilePath = os.path.join(self.TMP_DIR, "dump7")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7")
self.assertTrue(self.dumpDb(
"--db=%s --count_only" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s --create_if_missing" % loadedDbPath, dumpFilePath))
self.assertTrue(self.dumpDb("--db=%s --count_only" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
# DB should have atleast one value for scan to work
self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK")
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1")
# Dump command fails because of typo in params
dumpFilePath = os.path.join(self.TMP_DIR, "dump8")
self.assertFalse(self.dumpDb(
"--db=%s --create_if_missing" % origDbPath, dumpFilePath))
self.assertFalse(
self.dumpDb("--db=%s --create_if_missing" % origDbPath, dumpFilePath)
)
# Dump and load with BlobDB enabled
blobParams = " ".join(["--enable_blob_files", "--min_blob_size=1",
"--blob_file_size=2097152"])
blobParams = " ".join(
["--enable_blob_files", "--min_blob_size=1", "--blob_file_size=2097152"]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump9")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9")
self.assertTrue(self.dumpDb(
"--db=%s" % (origDbPath), dumpFilePath))
self.assertTrue(self.loadDb(
"--db=%s %s --create_if_missing --disable_wal" % (loadedDbPath, blobParams),
dumpFilePath))
self.assertRunOKFull("scan --db=%s" % loadedDbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(self.dumpDb("--db=%s" % (origDbPath), dumpFilePath))
self.assertTrue(
self.loadDb(
"--db=%s %s --create_if_missing --disable_wal"
% (loadedDbPath, blobParams),
dumpFilePath,
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4"
)
blob_files = self.getBlobFiles(loadedDbPath)
self.assertTrue(len(blob_files) >= 1)
@ -404,12 +468,14 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val", "OK")
self.assertRunOK(
"idump", "'a' seq:1, type:1 => val\n"
"'b' seq:2, type:1 => val\nInternal keys in range: 2")
"idump",
"'a' seq:1, type:1 => val\n"
"'b' seq:2, type:1 => val\nInternal keys in range: 2",
)
self.assertRunOK(
"idump --input_key_hex --from=%s --to=%s" % (hex(ord('a')),
hex(ord('b'))),
"'a' seq:1, type:1 => val\nInternal keys in range: 1")
"idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))),
"'a' seq:1, type:1 => val\nInternal keys in range: 1",
)
def testIDumpDecodeBlobIndex(self):
print("Running testIDumpDecodeBlobIndex...")
@ -420,45 +486,55 @@ class LDBTestCase(unittest.TestCase):
regex = ".*\[blob ref\].*"
expected_pattern = re.compile(regex)
cmd = "idump %s --decode_blob_index"
self.assertRunOKFull((cmd)
% (self.dbParam(self.DB_NAME)),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd) % (self.dbParam(self.DB_NAME)),
expected_pattern,
unexpected=False,
isPattern=True,
)
def testMiscAdminTask(self):
print("Running testMiscAdminTask...")
# These tests need to be improved; for example with asserts about
# whether compaction or level reduction actually took place.
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4",
"OK")
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertTrue(0 == run_err_null(
"./ldb compact --db=%s" % origDbPath))
self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath))
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null(
"./ldb reduce_levels --db=%s --new_levels=2" % origDbPath))
self.assertTrue(
0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null(
"./ldb reduce_levels --db=%s --new_levels=3" % origDbPath))
self.assertTrue(
0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null(
"./ldb compact --db=%s --from=x1 --to=x3" % origDbPath))
self.assertTrue(
0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
self.assertTrue(0 == run_err_null(
"./ldb compact --db=%s --hex --from=0x6131 --to=0x6134"
% origDbPath))
self.assertTrue(
0
== run_err_null(
"./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath
)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
#TODO(dilip): Not sure what should be passed to WAL.Currently corrupted.
self.assertTrue(0 == run_err_null(
"./ldb dump_wal --db=%s --walfile=%s --header" % (
origDbPath, os.path.join(origDbPath, "LOG"))))
# TODO(dilip): Not sure what should be passed to WAL.Currently corrupted.
self.assertTrue(
0
== run_err_null(
"./ldb dump_wal --db=%s --walfile=%s --header"
% (origDbPath, os.path.join(origDbPath, "LOG"))
)
)
self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
def testCheckConsistency(self):
@ -470,8 +546,9 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("get x1", "y1")
self.assertRunOK("checkconsistency", "OK")
sstFilePath = my_check_output("ls %s" % os.path.join(dbPath, "*.sst"),
shell=True)
sstFilePath = my_check_output(
"ls %s" % os.path.join(dbPath, "*.sst"), shell=True
)
# Modify the file
my_check_output("echo 'evil' > %s" % sstFilePath, shell=True)
@ -482,8 +559,7 @@ class LDBTestCase(unittest.TestCase):
self.assertRunFAIL("checkconsistency")
def dumpLiveFiles(self, params, dumpFile):
return 0 == run_err_null("./ldb dump_live_files %s > %s" % (
params, dumpFile))
return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile))
def testDumpLiveFiles(self):
print("Running testDumpLiveFiles...")
@ -506,7 +582,12 @@ class LDBTestCase(unittest.TestCase):
dbPath += "/"
# Call the dump_live_files function with the edited dbPath name.
self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, dumpFilePath))
self.assertTrue(
self.dumpLiveFiles(
"--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath,
dumpFilePath,
)
)
# Investigate the output
with open(dumpFilePath, "r") as tmp:
@ -517,14 +598,14 @@ class LDBTestCase(unittest.TestCase):
self.assertTrue(len(sstFileList) >= 1)
for sstFilename in sstFileList:
filenumber = re.findall(r"\d+.sst", sstFilename)[0]
self.assertEqual(sstFilename, dbPath+filenumber)
self.assertEqual(sstFilename, dbPath + filenumber)
# Check that all the Blob filenames have a correct full path (no multiple '/').
blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data)
self.assertTrue(len(blobFileList) >= 1)
for blobFilename in blobFileList:
filenumber = re.findall(r"\d+.blob", blobFilename)[0]
self.assertEqual(blobFilename, dbPath+filenumber)
self.assertEqual(blobFilename, dbPath + filenumber)
# Check that all the manifest filenames
# have a correct full path (no multiple '/').
@ -532,15 +613,16 @@ class LDBTestCase(unittest.TestCase):
self.assertTrue(len(manifestFileList) >= 1)
for manifestFilename in manifestFileList:
filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0]
self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber)
self.assertEqual(manifestFilename, dbPath + "MANIFEST-" + filenumber)
# Check that the blob file index is decoded.
decodedBlobIndex = re.findall(r"\[blob ref\]", data)
self.assertTrue(len(decodedBlobIndex) >= 1)
def listLiveFilesMetadata(self, params, dumpFile):
return 0 == run_err_null("./ldb list_live_files_metadata %s > %s" % (
params, dumpFile))
return 0 == run_err_null(
"./ldb list_live_files_metadata %s > %s" % (params, dumpFile)
)
def testListLiveFilesMetadata(self):
print("Running testListLiveFilesMetadata...")
@ -554,23 +636,27 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1))
dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2")
self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath2))
self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath2
)
)
# Collect SST filename and level from dump_live_files
with open(dumpFilePath1, "r") as tmp:
data = tmp.read()
filename1 = re.findall(r".*\d+\.sst",data)[0]
level1 = re.findall(r"level:\d+",data)[0].split(':')[1]
filename1 = re.findall(r".*\d+\.sst", data)[0]
level1 = re.findall(r"level:\d+", data)[0].split(":")[1]
# Collect SST filename and level from list_live_files_metadata
with open(dumpFilePath2, "r") as tmp:
data = tmp.read()
filename2 = re.findall(r".*\d+\.sst",data)[0]
level2 = re.findall(r"level \d+",data)[0].split(' ')[1]
filename2 = re.findall(r".*\d+\.sst", data)[0]
level2 = re.findall(r"level \d+", data)[0].split(" ")[1]
# Assert equality between filenames and levels.
self.assertEqual(filename1,filename2)
self.assertEqual(level1,level2)
self.assertEqual(filename1, filename2)
self.assertEqual(level1, level2)
# Create multiple column families and compare the output
# of list_live_files_metadata with dump_live_files once again.
@ -586,7 +672,11 @@ class LDBTestCase(unittest.TestCase):
dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3))
dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4")
self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath4))
self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath4
)
)
# dump_live_files:
# parse the output and create a map:
@ -601,7 +691,7 @@ class LDBTestCase(unittest.TestCase):
# re.findall should not reorder the data.
# Therefore namesAndLevels[i] matches the data from cfs[i].
for count, nameAndLevel in enumerate(namesAndLevels):
sstFilename = re.findall(r"\d+.sst",nameAndLevel)[0]
sstFilename = re.findall(r"\d+.sst", nameAndLevel)[0]
sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0]
cf = cfs[count]
referenceMap[sstFilename] = [sstLevel, cf]
@ -618,13 +708,13 @@ class LDBTestCase(unittest.TestCase):
sstLines = re.findall(r".*\d+.sst.*", data)
for line in sstLines:
sstFilename = re.findall(r"\d+.sst", line)[0]
sstLevel = re.findall(r"(?<=level )\d+",line)[0]
cf = re.findall(r"(?<=column family \')\w+(?=\')",line)[0]
sstLevel = re.findall(r"(?<=level )\d+", line)[0]
cf = re.findall(r"(?<=column family \')\w+(?=\')", line)[0]
testMap[sstFilename] = [sstLevel, cf]
# Compare the map obtained from dump_live_files and the map
# obtained from list_live_files_metadata. Everything should match.
self.assertEqual(referenceMap,testMap)
self.assertEqual(referenceMap, testMap)
def getManifests(self, directory):
return glob.glob(directory + "/MANIFEST-*")
@ -657,25 +747,30 @@ class LDBTestCase(unittest.TestCase):
manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 1)
# Test with the default manifest file in dbPath.
self.assertRunOKFull(cmd % dbPath, expected_pattern,
unexpected=False, isPattern=True)
self.assertRunOKFull(
cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
)
self.copyManifests(manifest_files[0], manifest_files[0] + "1")
manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 2)
# Test with multiple manifest files in dbPath.
self.assertRunFAILFull(cmd % dbPath)
# Running it with the copy we just created should pass.
self.assertRunOKFull((cmd + " --path=%s")
% (dbPath, manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd + " --path=%s") % (dbPath, manifest_files[1]),
expected_pattern,
unexpected=False,
isPattern=True,
)
# Make sure that using the dump with --path will result in identical
# output as just using manifest_dump.
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd) % (manifest_files[1]),
expected_pattern,
unexpected=False,
isPattern=True,
)
# Check if null characters doesn't infer with output format.
self.assertRunOK("put a1 b1", "OK")
@ -696,11 +791,14 @@ class LDBTestCase(unittest.TestCase):
# Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\'
# (we cannot use null character in the subprocess input either,
# so we have to use '.{2}')
cmd_verbose = "manifest_dump --verbose --db=%s | grep -aq $'\'r.{2}O\'' && echo 'matched' || echo 'not matched'" %dbPath
self.assertRunOKFull(cmd_verbose , expected_verbose_output,
unexpected=False, isPattern=True)
cmd_verbose = (
"manifest_dump --verbose --db=%s | grep -aq $''r.{2}O'' && echo 'matched' || echo 'not matched'"
% dbPath
)
self.assertRunOKFull(
cmd_verbose, expected_verbose_output, unexpected=False, isPattern=True
)
def testGetProperty(self):
print("Running testGetProperty...")
@ -709,16 +807,15 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("put 2 2", "OK")
# A "string" property
cmd = "--db=%s get_property rocksdb.estimate-num-keys"
self.assertRunOKFull(cmd % dbPath,
"rocksdb.estimate-num-keys: 2")
self.assertRunOKFull(cmd % dbPath, "rocksdb.estimate-num-keys: 2")
# A "map" property
# FIXME: why doesn't this pick up two entries?
cmd = "--db=%s get_property rocksdb.aggregated-table-properties"
part = "rocksdb.aggregated-table-properties.num_entries: "
expected_pattern = re.compile(part)
self.assertRunOKFull(cmd % dbPath,
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
)
# An invalid property
cmd = "--db=%s get_property rocksdb.this-property-does-not-exist"
self.assertRunFAILFull(cmd % dbPath)
@ -738,27 +835,27 @@ class LDBTestCase(unittest.TestCase):
sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s --decode_blob_index"
self.assertRunOKFull((cmd)
% (sst_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd) % (sst_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testBlobDump(self):
print("Running testBlobDump")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK")
self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
# Pattern to expect from blob file dump.
regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary"
regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa
expected_pattern = re.compile(regex)
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
cmd = "dump --path=%s --dump_uncompressed_blobs"
self.assertRunOKFull((cmd)
% (blob_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd) % (blob_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testWALDump(self):
print("Running testWALDump...")
@ -775,15 +872,14 @@ class LDBTestCase(unittest.TestCase):
wal_files = self.getWALFiles(dbPath)
self.assertTrue(len(wal_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (wal_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
self.assertRunOKFull(
(cmd) % (wal_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testListColumnFamilies(self):
print("Running testListColumnFamilies...")
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
cmd = "list_column_families | grep -v \"Column families\""
cmd = 'list_column_families | grep -v "Column families"'
# Test on valid dbPath.
self.assertRunOK(cmd, "{default}")
# Test on empty path.
@ -791,34 +887,28 @@ class LDBTestCase(unittest.TestCase):
def testColumnFamilies(self):
print("Running testColumnFamilies...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) # noqa: F841 T25377293 Grandfathered in
_ = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put cf1_1 1 --create_if_missing", "OK")
self.assertRunOK("put cf1_2 2 --create_if_missing", "OK")
self.assertRunOK("put cf1_3 3 --try_load_options", "OK")
# Given non-default column family to single CF DB.
self.assertRunFAIL("get cf1_1 --column_family=two")
self.assertRunOK("create_column_family two", "OK")
self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two",
"OK")
self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two",
"OK")
self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", "OK")
self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", "OK")
self.assertRunOK("delete cf1_2", "OK")
self.assertRunOK("create_column_family three", "OK")
self.assertRunOK("delete cf2_2 --column_family=two", "OK")
self.assertRunOK(
"put cf3_1 3 --create_if_missing --column_family=three",
"OK")
self.assertRunOK("put cf3_1 3 --create_if_missing --column_family=three", "OK")
self.assertRunOK("get cf1_1 --column_family=default", "1")
self.assertRunOK("dump --column_family=two",
"cf2_1 ==> 1\nKeys in range: 1")
self.assertRunOK("dump --column_family=two --try_load_options",
"cf2_1 ==> 1\nKeys in range: 1")
self.assertRunOK("dump",
"cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2")
self.assertRunOK("get cf2_1 --column_family=two",
"1")
self.assertRunOK("get cf3_1 --column_family=three",
"3")
self.assertRunOK("dump --column_family=two", "cf2_1 ==> 1\nKeys in range: 1")
self.assertRunOK(
"dump --column_family=two --try_load_options",
"cf2_1 ==> 1\nKeys in range: 1",
)
self.assertRunOK("dump", "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2")
self.assertRunOK("get cf2_1 --column_family=two", "1")
self.assertRunOK("get cf3_1 --column_family=three", "3")
self.assertRunOK("drop_column_family three", "OK")
# non-existing column family.
self.assertRunFAIL("get cf3_1 --column_family=four")
@ -830,32 +920,36 @@ class LDBTestCase(unittest.TestCase):
# Dump, load, write external sst and ingest it in another db
dbPath = os.path.join(self.TMP_DIR, "db1")
self.assertRunOK(
"batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4"
% dbPath,
"OK")
self.assertRunOK("scan --db=%s" % dbPath,
"x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
"batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath,
"OK",
)
self.assertRunOK("scan --db=%s" % dbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4")
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
with open(dumpFilePath, 'w') as f:
with open(dumpFilePath, "w") as f:
f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40")
externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst")
self.assertTrue(self.writeExternSst("--create_if_missing --db=%s"
% dbPath,
dumpFilePath,
externSstPath))
self.assertTrue(
self.writeExternSst(
"--create_if_missing --db=%s" % dbPath, dumpFilePath, externSstPath
)
)
# cannot ingest if allow_global_seqno is false
self.assertFalse(
self.ingestExternSst(
"--create_if_missing --allow_global_seqno=false --db=%s"
% dbPath,
externSstPath))
"--create_if_missing --allow_global_seqno=false --db=%s" % dbPath,
externSstPath,
)
)
self.assertTrue(
self.ingestExternSst(
"--create_if_missing --allow_global_seqno --db=%s"
% dbPath,
externSstPath))
self.assertRunOKFull("scan --db=%s" % dbPath,
"x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40")
"--create_if_missing --allow_global_seqno --db=%s" % dbPath,
externSstPath,
)
)
self.assertRunOKFull(
"scan --db=%s" % dbPath, "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40"
)
if __name__ == "__main__":
unittest.main()

@ -2,11 +2,12 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals
import subprocess
import argparse
import random
import time
import subprocess
import sys
import time
def generate_runtimes(total_runtime):
@ -24,31 +25,33 @@ def generate_runtimes(total_runtime):
def main(args):
runtimes = generate_runtimes(int(args.runtime_sec))
print("Going to execute write stress for " + str(runtimes)) # noqa: E999 T25377293 Grandfathered in
print(
"Going to execute write stress for " + str(runtimes)
) # noqa: E999 T25377293 Grandfathered in
first_time = True
for runtime in runtimes:
kill = random.choice([False, True])
cmd = './write_stress --runtime_sec=' + \
("-1" if kill else str(runtime))
cmd = "./write_stress --runtime_sec=" + ("-1" if kill else str(runtime))
if len(args.db) > 0:
cmd = cmd + ' --db=' + args.db
cmd = cmd + " --db=" + args.db
if first_time:
first_time = False
else:
# use current db
cmd = cmd + ' --destroy_db=false'
cmd = cmd + " --destroy_db=false"
if random.choice([False, True]):
cmd = cmd + ' --delete_obsolete_files_with_fullscan=true'
cmd = cmd + " --delete_obsolete_files_with_fullscan=true"
if random.choice([False, True]):
cmd = cmd + ' --low_open_files_mode=true'
cmd = cmd + " --low_open_files_mode=true"
print("Running write_stress for %d seconds (%s): %s" %
(runtime, ("kill-mode" if kill else "clean-shutdown-mode"),
cmd))
print(
"Running write_stress for %d seconds (%s): %s"
% (runtime, ("kill-mode" if kill else "clean-shutdown-mode"), cmd)
)
child = subprocess.Popen([cmd], shell=True)
killtime = time.time() + runtime
@ -58,19 +61,23 @@ def main(args):
if child.returncode == 0:
break
else:
print("ERROR: write_stress died with exitcode=%d\n"
% child.returncode)
print(
"ERROR: write_stress died with exitcode=%d\n" % child.returncode
)
sys.exit(1)
if kill:
child.kill()
# breathe
time.sleep(3)
if __name__ == '__main__':
if __name__ == "__main__":
random.seed(time.time())
parser = argparse.ArgumentParser(description="This script runs and kills \
write_stress multiple times")
parser.add_argument("--runtime_sec", default='1000')
parser.add_argument("--db", default='')
parser = argparse.ArgumentParser(
description="This script runs and kills \
write_stress multiple times"
)
parser.add_argument("--runtime_sec", default="1000")
parser.add_argument("--db", default="")
args = parser.parse_args()
main(args)

Loading…
Cancel
Save