From 0f3208d8fae606ac5c2121ed75572ef9ab99f8f0 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 24 Oct 2019 12:42:05 +0200 Subject: [PATCH] Adds a README for the benchmark --- README.md | 2 + bench/README.md | 38 + bench/bsbm-plot.py | 4 +- bench/bsbm.businessIntelligence.svg | 1798 +++++++++++++++++++++++++++ bench/bsbm.explore.svg | 1567 +++++++++++++++++++++++ bench/bsbm_oxigraph.sh | 7 +- 6 files changed, 3411 insertions(+), 5 deletions(-) create mode 100644 bench/README.md create mode 100644 bench/bsbm.businessIntelligence.svg create mode 100644 bench/bsbm.explore.svg diff --git a/README.md b/README.md index 489c7b7f..ba98d63c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Are currently implemented: * [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/Tpt/rio). * [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/) and [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/). +A preliminary benchmark [is provided](bench/README.md). + ## Run the web server ### Build diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 00000000..bb0b1ffc --- /dev/null +++ b/bench/README.md @@ -0,0 +1,38 @@ +BSBM +==== + +The [Berlin SPARQL Benchmark (BSBM)](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/) is a simple SPARQL benchmark. + +It provides a dataset generator and multiple set of queries grouped by "use cases". + +## Results + +We compare here Oxigraph with some existing SPARQL implementations (Blazegraph, Virtuoso and GraphDB). + +The dataset used in the following charts is generated with 10k "products" (see [its spec](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/Dataset/index.html)). It leads to the creation of 3.5M triples. +It has been executed on a Dell Precision 5520 with 16GB of RAM. For Oxigraph, available memory has been limited to 1GB. + +### Explore +The [explore use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/ExploreUseCase/index.html) is composed of 11 queries that do simple data retrieval. + +Query 6 existed in previous versions of the benchmark as is now removed. + +![explore use case results](bsbm.explore.svg) + +### Business Intelligence +The [business intelligence use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/BusinessIntelligenceUseCase/index.html) is composed of 8 complex analytics queries. + +Query 4 seems to be failing on Virtuoso and query 5 on Blazegraph and GraphDB. + +![explore use case results](bsbm.businessIntelligence.svg) + +## How to reproduce the benchmark + +The code of the benchmark is in the `bsbm-tools` submodule. You should pull it with a `git submodule update` before running the benchmark. + +To run the benchmark for Oxigraph run `bash bsbm_oxigraph.sh`. It will compile the current Oxigraph code and run the benchmark against it. +You could tweak the number of products in the dataset and the available memory using the environment variables at the beginning of `bsbm_oxigraph.sh`. + +To generate the plots run `python3 bsbsm-plot.py`. + +Scripts are also provided for the other benchmarks (`bsbm_blazegraph.sh`, `bsbm_graphdb.sh` and `bsbm_virtuoso.sh`). \ No newline at end of file diff --git a/bench/bsbm-plot.py b/bench/bsbm-plot.py index 5c9b896b..2362c67a 100644 --- a/bench/bsbm-plot.py +++ b/bench/bsbm-plot.py @@ -27,7 +27,7 @@ for file in glob('bsbm.explore.*.xml'): val = float(query.find('aqet').text) if val > 0: aqet[run][int(query.attrib['nr'])] = val -plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s)', 'bsbm.explore.png') +plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s)', 'bsbm.explore.svg') # BSBM business intelligence aqet = defaultdict(dict) @@ -37,6 +37,6 @@ for file in glob('bsbm.businessIntelligence.*.xml'): val = float(query.find('aqet').text) if val > 0: aqet[run][int(query.attrib['nr'])] = val -plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s) - log scale', 'bsbm.businessIntelligence.png', log=True) +plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s) - log scale', 'bsbm.businessIntelligence.svg', log=True) plt.show() diff --git a/bench/bsbm.businessIntelligence.svg b/bench/bsbm.businessIntelligence.svg new file mode 100644 index 00000000..b067ddcf --- /dev/null +++ b/bench/bsbm.businessIntelligence.svgdiff --git a/bench/bsbm.explore.svg b/bench/bsbm.explore.svg new file mode 100644 index 00000000..2cda84a3 --- /dev/null +++ b/bench/bsbm.explore.svgdiff --git a/bench/bsbm_oxigraph.sh b/bench/bsbm_oxigraph.sh index 573b8ce1..0284073b 100755 --- a/bench/bsbm_oxigraph.sh +++ b/bench/bsbm_oxigraph.sh @@ -1,10 +1,11 @@ #!/usr/bin/env bash -DATASET_SIZE=100000 -MEMORY_SIZE=1000000 +DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. +MEMORY_SIZE=1000000 # availlable memory for Oxigraph in GB. Useful to simulate low RAM machines. + cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -cargo build --release --manifest-path="../../server/Cargo.toml" +cargo build --release --manifest-path="../../server/Cargo.toml" ( ulimit -d ${MEMORY_SIZE} ./../../target/release/oxigraph_server --file oxigraph_data