Updates benchmarks

4 years ago · e34e56beef
parent d248ee6183
commit e34e56beef
10 changed files with 3691 additions and 2869 deletions
--- a/bench/README.md
+++ b/bench/README.md
@ -7,11 +7,13 @@ It provides a dataset generator and multiple set of queries grouped by "use case

 ## Results

-We compare here Oxigraph with some existing SPARQL implementations (Blazegraph, Virtuoso and GraphDB).
+We compare here Oxigraph with some existing SPARQL implementations (Blazegraph, GraphDB, Jena and Virtuoso).

-The dataset used in the following charts is generated with 10k "products" (see [its spec](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/Dataset/index.html)). It leads to the creation of 3.5M triples.
+The dataset used in the following charts is generated with 100k "products" (see [its spec](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/Dataset/index.html)). It leads to the creation of 35M triples.
 It has been executed on a PrevailPro P3000 with 32GB of RAM.
-The tests have been executed with a concurrency factor of 5 (i.e. at most 5 queries are send at the same time to the server).
+The tests have been executed with a concurrency factor of 16 (i.e. at most 16 queries are send at the same time to the server).
+
+Beware, the graph *y* axis is in log scale to properly display on the same graph systems with very different speed behaviors.

 ### Explore
 The [explore use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/ExploreUseCase/index.html) is composed of 11 queries that do simple data retrieval.
@ -23,7 +25,7 @@ Query 6 existed in previous versions of the benchmark but is now removed.
 ### Explore and update
 The [explore and update use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/index.html#usecase_explore_and_update) is composed of the 2 operations of the [update use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/UpdateUseCase/index.html) (`INSERT DATA` and `DELETE WHERE`) and the 11 queries of the [explore use case](http://wifo5-03.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/spec/ExploreUseCase/index.html) and the .

-The first two char elements (1 and 2) are the 2 updates and the other (3 to 14) are the 11 queries.
+The first two elements (1 and 2) are the 2 updates and the other (3 to 14) are the 11 queries.

 ![explore use case results](bsbm.exploreAndUpdate.svg)

@ -48,4 +50,4 @@ You could tweak the number of products in the dataset using the environment vari

 To generate the plots run `python3 bsbsm-plot.py`.

-Scripts are also provided for the other benchmarks (`bsbm_blazegraph.sh`, `bsbm_graphdb.sh` and `bsbm_virtuoso.sh`).
+Scripts are also provided for the other benchmarks (`bsbm_blazegraph.sh`, `bsbm_graphdb.sh`, `bsbm_jena.sh` and `bsbm_virtuoso.sh`).
--- a/bench/bsbm-plot.py
+++ b/bench/bsbm-plot.py
@ -1,5 +1,5 @@
-import xml.etree.ElementTree as ET
 import matplotlib.pyplot as plt
+import xml.etree.ElementTree as ET
 from collections import defaultdict
 from glob import glob
 from numpy import array
@ -10,11 +10,13 @@ def plot_y_per_x_per_plot(data, xlabel, ylabel, file, log=False):

    bar_width = 1 / (len(data) + 1)
    for i, (label, xys) in enumerate(sorted(data.items())):
-        plt.bar(array(list(xys.keys())) + bar_width * (i + 1 - len(data) / 2), array(list(xys.values())), bar_width, label=label)
+        plt.bar(array(list(xys.keys())) + bar_width * (i + 1 - len(data) / 2), array(list(xys.values())), bar_width,
+                label=label)

    plt.legend()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
+    plt.yscale('log')
    if log:
        plt.yscale('log')
    plt.savefig(file)
@ -22,14 +24,29 @@ def plot_y_per_x_per_plot(data, xlabel, ylabel, file, log=False):

 def plot_usecase(name: str):
    aqet = defaultdict(dict)
+    avgresults_by_query = defaultdict(lambda: defaultdict(dict))
    for file in glob('bsbm.{}.*.xml'.format(name)):
-        run = file.replace('bsbm.{}.'.format(name), '').replace('.xml', '')
+        parts = file.split('.')
+        run = '.'.join(parts[2:-1])
        for query in ET.parse(file).getroot().find('queries').findall('query'):
-            val = float(query.find('aqet').text)
-            if val > 0:
-                aqet[run][int(query.attrib['nr'])] = val
+            query_id = int(query.attrib['nr'])
+            for child in query.iter():
+                if child.tag == "aqet":
+                    val = float(query.find('aqet').text)
+                    if val > 0:
+                        aqet[run][query_id] = val
+                elif child.tag == "avgresults":
+                    avgresults_by_query[query_id][int(parts[3])][run] = float(query.find('avgresults').text)
    plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s)', 'bsbm.{}.svg'.format(name))

+    # we check if avgresults seems consistent
+    for query, t in avgresults_by_query.items():
+        for size, value_by_run in t.items():
+            avg = sum(value_by_run.values()) / len(value_by_run)
+            if not all(abs(v - avg) < 1 for v in value_by_run.values()):
+                print(
+                    f'Strange value for average results for usecase {name} of size {size} and query {query}: {value_by_run}')
+

 plot_usecase('explore')
 plot_usecase('exploreAndUpdate')
--- a/bench/bsbm.businessIntelligence.svg
+++ b/bench/bsbm.businessIntelligence.svg
--- a/bench/bsbm.explore.svg
+++ b/bench/bsbm.explore.svg
--- a/bench/bsbm.exploreAndUpdate.svg
+++ b/bench/bsbm.exploreAndUpdate.svg
--- a/bench/bsbm_blazegraph.sh
+++ b/bench/bsbm_blazegraph.sh
@ -1,13 +1,13 @@
 #!/usr/bin/env bash

-DATASET_SIZE=10000
-PARALLELISM=5
+DATASET_SIZE=100000
+PARALLELISM=16
 cd bsbm-tools
 ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
 wget https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar
 /usr/lib/jvm/java-8-openjdk/bin/java -server -jar blazegraph.jar &
 sleep 10
-curl -f -X POST -H 'Content-Type:text/plain' --data-binary "@explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql
+curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql
 ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.blazegraph.${DATASET_SIZE}.${PARALLELISM}.2.1.5.xml" http://localhost:9999/blazegraph/sparql
 ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.${DATASET_SIZE}.${PARALLELISM}.2.1.5.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt"
 #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.${DATASET_SIZE}.${PARALLELISM}.2.1.5.xml" http://localhost:9999/blazegraph/sparql
--- a/bench/bsbm_graphdb.sh
+++ b/bench/bsbm_graphdb.sh
@ -1,19 +1,20 @@
 #!/usr/bin/env bash

-DATASET_SIZE=10000 # number of products in the dataset. There is around 350 triples generated by product.
-PARALLELISM=5
+DATASET_SIZE=100000
+PARALLELISM=16
+VERSION="9.3.3"
 cd bsbm-tools
-./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}"
+./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
 export JAVA_HOME=/usr/lib/jvm/java-11-openjdk
 ../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN &
 sleep 10
 curl -f -X POST http://localhost:7200/rest/repositories -H 'Content-Type:application/json' -d '
 {"id":"test","params":{"ruleset":{"label":"Ruleset","name":"ruleset","value":"empty"},"title":{"label":"Repository title","name":"title","value":"GraphDB Free repository"},"checkForInconsistencies":{"label":"Check for inconsistencies","name":"checkForInconsistencies","value":"false"},"disableSameAs":{"label":"Disable owl:sameAs","name":"disableSameAs","value":"true"},"baseURL":{"label":"Base URL","name":"baseURL","value":"http://example.org/owlim#"},"repositoryType":{"label":"Repository type","name":"repositoryType","value":"file-repository"},"id":{"label":"Repository ID","name":"id","value":"repo-test"},"storageFolder":{"label":"Storage folder","name":"storageFolder","value":"storage"}},"title":"Test","type":"free"}
 '
-curl -f -X PUT -H 'Content-Type:application/n-triples' --data-binary "@explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/test/statements
-./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${DATASET_SIZE}.${PARALLELISM}.9.3.3.xml" http://localhost:7200/repositories/test
-./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${DATASET_SIZE}.${PARALLELISM}.9.3.3.xml" http://localhost:7200/repositories/test -u http://localhost:7200/repositories/test -udataset "explore-update-${DATASET_SIZE}.nt"
-#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${DATASET_SIZE}.${PARALLELISM}.9.3.3.xml" http://localhost:7200/repositories/test
+curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/test/statements
+./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:7200/repositories/test
+./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:7200/repositories/test -u http://localhost:7200/repositories/test/statements -udataset "explore-update-${DATASET_SIZE}.nt"
+#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:7200/repositories/test
 kill $!
 sleep 5
 rm -r ../graphdb-free-9.3.3/data
--- a/bench/bsbm_jena.sh
+++ b/bench/bsbm_jena.sh
@ -1,24 +1,27 @@
 #!/usr/bin/env bash

-DATASET_SIZE=10000
-PARALLELISM=5
+DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
+PARALLELISM=16
+VERSION="4.3.2"
 cd bsbm-tools
 ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
-wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-4.1.0.zip
-unzip apache-jena-fuseki-4.1.0.zip
-rm apache-jena-fuseki-4.1.0.zip
-echo "rootLogger.level = ERROR" > log4j2.properties
-./apache-jena-fuseki-4.1.0/fuseki-server --tdb2 --loc=td_data --update /bsbm &
+wget https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip
+unzip apache-jena-${VERSION}.zip
+rm apache-jena-${VERSION}.zip
+./apache-jena-${VERSION}/bin/tdb2.tdbloader --loader=parallel --loc=td_data "explore-${DATASET_SIZE}.nt"
+wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-${VERSION}.zip
+unzip apache-jena-fuseki-${VERSION}.zip
+rm apache-jena-fuseki-${VERSION}.zip
+echo "rootLogger.level = ERROR" > ./apache-jena-fuseki-${VERSION}/log4j2.properties
+./apache-jena-fuseki-${VERSION}/fuseki-server --tdb2 --loc=td_data --update /bsbm &
 sleep 10
-curl -f -X POST -H 'Content-Type:text/plain' --data-binary "@explore-${DATASET_SIZE}.nt" http://localhost:3030/bsbm
-sleep 60
-./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.jena.${DATASET_SIZE}.${PARALLELISM}.4.1.0.xml" http://localhost:3030/bsbm/query
-./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${DATASET_SIZE}.${PARALLELISM}.4.1.0.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt"
-#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${DATASET_SIZE}.${PARALLELISM}.4.1.0.xml" http://localhost:3030/bsbm/query
+./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.jena.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:3030/bsbm/query
+./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt"
+#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://localhost:3030/bsbm/query
 kill $!
 rm "explore-${DATASET_SIZE}.nt"
 rm "explore-update-${DATASET_SIZE}.nt"
 rm -r td_data
 rm -r run
-rm -r apache-jena-fuseki-4.1.0
-rm log4j2.properties
+rm -r apache-jena-${VERSION}
+rm -r apache-jena-fuseki-${VERSION}
--- a/bench/bsbm_oxigraph.sh
+++ b/bench/bsbm_oxigraph.sh
@ -1,17 +1,17 @@
 #!/usr/bin/env bash

-DATASET_SIZE=10000 # number of products in the dataset. There is around 350 triples generated by product.
-PARALLELISM=5
+DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
+PARALLELISM=16
 cd bsbm-tools
 ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
 cargo build --release --manifest-path="../../server/Cargo.toml"
+VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g')
 ./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
 ./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 &
 sleep 1
-curl -f -X POST -H 'Content-Type:application/n-triples' --data-binary "@explore-${DATASET_SIZE}.nt" http://127.0.0.1:7878/store?default
-./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${DATASET_SIZE}.${PARALLELISM}.main.xml" http://127.0.0.1:7878/query
-./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${DATASET_SIZE}.${PARALLELISM}.main.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"
-#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${DATASET_SIZE}.${PARALLELISM}.main.xml" "http://127.0.0.1:7878/query"
+./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://127.0.0.1:7878/query
+./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"
+#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" "http://127.0.0.1:7878/query"
 kill $!
 rm -r oxigraph_data
 rm "explore-${DATASET_SIZE}.nt"
--- a/bench/bsbm_virtuoso.sh
+++ b/bench/bsbm_virtuoso.sh
@ -1,9 +1,10 @@
 #!/usr/bin/env bash

-DATASET_SIZE=10000
-PARALLELISM=5
+DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
+PARALLELISM=16
+VERSION="7.2.5"
 cd bsbm-tools
-./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}"
+./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
 cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini
 mkdir ../database
 ../virtuoso-opensource/bin/virtuoso-t -f &
@ -13,8 +14,9 @@ SPARQL CREATE GRAPH <urn:graph:test>;
 ld_dir('$(realpath .)', 'explore-${DATASET_SIZE}.nt', 'urn:graph:test');
 rdf_loader_run();
 EOF
-./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.virtuoso.${DATASET_SIZE}.7.2.5.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
-#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${DATASET_SIZE}.7.2.5.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
+./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.virtuoso.${DATASET_SIZE}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
+# ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt"
+# ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${DATASET_SIZE}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
 kill $!
 rm -r ../database
 rm "explore-${DATASET_SIZE}.nt"