diff --git a/.circleci/config.yml b/.circleci/config.yml index c7c3ff056..8c2c7e931 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -206,7 +206,11 @@ commands: destination: test_logs - run: name: Send benchmark report to visualisation - command: ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3/_doc + command: | + set +e + set +o pipefail + ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3/_doc + true executors: windows-2xlarge: @@ -964,6 +968,16 @@ workflows: build-fuzzers: jobs: - build-fuzzers + benchmark-linux: + triggers: + - schedule: + cron: "0 * * * *" + filters: + branches: + only: + - main + jobs: + - benchmark-linux nightly: triggers: - schedule: diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py index 9379bfcbd..572888eef 100755 --- a/build_tools/benchmark_log_tool.py +++ b/build_tools/benchmark_log_tool.py @@ -30,45 +30,56 @@ class BenchmarkResultException(Exception): class BenchmarkUtils: - expected_keys = ['ops_sec', 'mb_sec', 'total_size_gb', 'level0_size_gb', 'sum_gb', 'write_amplification', - 'write_mbps', 'usec_op', 'percentile_50', 'percentile_75', - 'percentile_99', 'percentile_99.9', 'percentile_99.99', 'uptime', - 'stall_time', 'stall_percent', 'test_name', 'test_date', 'rocksdb_version', - 'job_id', 'timestamp'] - - metric_keys = ['ops_sec', 'mb_sec', 'total_size_gb', 'level0_size_gb', 'sum_gb', 'write_amplification', - 'write_mbps', 'usec_op', 'percentile_50', 'percentile_75', - 'percentile_99', 'percentile_99.9', 'percentile_99.99', 'uptime', - 'stall_time', 'stall_percent'] + expected_keys = ['ops_sec', 'mb_sec', 'lsm_sz', 'blob_sz', 'c_wgb', 'w_amp', + 'c_mbps', 'c_wsecs', 'c_csecs', 'b_rgb', 'b_wgb', 'usec_op', + 'p50', 'p99', 'p99.9', 'p99.99', 'pmax', + 'uptime', 'stall%', 'Nstall', 'u_cpu', 's_cpu', 'rss', 'test', 'date', 'version', 'job_id'] def sanity_check(row): - if not 'test_name' in row: + if not 'test' in row: + logging.debug(f"not 'test' in row: {row}") return False - if row['test_name'] == '': + if row['test'] == '': + logging.debug(f"row['test'] == '': {row}") return False - if not 'test_date' in row: + if not 'date' in row: + logging.debug(f"not 'date' in row: {row}") return False if not 'ops_sec' in row: + logging.debug(f"not 'ops_sec' in row: {row}") return False try: v = int(row['ops_sec']) except (ValueError, TypeError): + logging.debug(f"int(row['ops_sec']): {row}") + return False + try: + (_, _) = parser.parse(row['date'], fuzzy_with_tokens=True) + except (parser.ParserError): + logging.error(f"parser.parse((row['date']): not a valid format for date in row: {row}") return False return True def conform_opensearch(row): - (dt, _) = parser.parse(row['test_date'], fuzzy_with_tokens=True) + (dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True) row['test_date'] = dt.isoformat() return dict((key.replace('.', '_'), value) for (key, value) in row.items()) class ResultParser: - def __init__(self, field="(\w|[+-:.])+", intrafield="(\s)+", separator="\t"): + def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"): self.field = re.compile(field) self.intra = re.compile(intrafield) self.sep = re.compile(separator) + def ignore(self, l_in: str): + if len(l_in) == 0: + return True + if l_in[0:1] == '#': + return True + return False + def line(self, l_in: str): '''Parse a line into items Being clever about separators @@ -102,7 +113,7 @@ class ResultParser: def parse(self, lines): '''Parse something that iterates lines''' - rows = [self.line(line) for line in lines] + rows = [self.line(line) for line in lines if not self.ignore(line)] header = rows[0] width = len(header) records = [{k: v for (k, v) in itertools.zip_longest( @@ -123,7 +134,7 @@ def load_report_from_tsv(filename: str): def push_report_to_opensearch(report, esdocument): sanitized = [BenchmarkUtils.conform_opensearch(row) for row in report if BenchmarkUtils.sanity_check(row)] - logging.debug(f"upload {len(sanitized)} benchmarks to opensearch") + logging.debug(f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch") for single_benchmark in sanitized: logging.debug(f"upload benchmark: {single_benchmark}") response = requests.post( @@ -133,6 +144,13 @@ def push_report_to_opensearch(report, esdocument): f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}") response.raise_for_status() +def push_report_to_null(report): + + for row in report: + if BenchmarkUtils.sanity_check(row): + logging.debug(f"row {row}") + conformed = BenchmarkUtils.conform_opensearch(row) + logging.debug(f"conformed row {conformed}") def main(): '''Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch @@ -151,11 +169,15 @@ def main(): parser.add_argument('--tsvfile', default='build_tools/circle_api_scraper_input.txt', help='File from which to read tsv report') parser.add_argument('--esdocument', help='ElasticSearch/OpenSearch document URL to upload report into') + parser.add_argument('--upload', choices=['opensearch', 'none'], default='opensearch') args = parser.parse_args() logging.debug(f"Arguments: {args}") reports = load_report_from_tsv(args.tsvfile) - push_report_to_opensearch(reports, args.esdocument) + if (args.upload == 'opensearch'): + push_report_to_opensearch(reports, args.esdocument) + else: + push_report_to_null(reports) if __name__ == '__main__': sys.exit(main())