From 728a5dadf4b946f7eb720aba9c63fbdf18e0a9af Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 3 Nov 2021 21:21:30 +0100 Subject: [PATCH] Adds a native SPARQL query and update benchmark based on BSBM --- bench/bsbm_blazegraph.sh | 1 + bench/bsbm_graphdb.sh | 1 + bench/bsbm_jena.sh | 1 + bench/bsbm_oxigraph.sh | 1 + bench/bsbm_virtuoso.sh | 1 + lib/Cargo.toml | 3 +- lib/benches/store.rs | 175 ++++++++++++++++++++++++--------------- testsuite/rdf-star | 2 +- 8 files changed, 114 insertions(+), 71 deletions(-) diff --git a/bench/bsbm_blazegraph.sh b/bench/bsbm_blazegraph.sh index 00acbb94..ec2d1d9d 100755 --- a/bench/bsbm_blazegraph.sh +++ b/bench/bsbm_blazegraph.sh @@ -15,4 +15,5 @@ kill $! rm blazegraph.jar rm blazegraph.jnl rm "explore-${DATASET_SIZE}.nt" +rm "explore-update-${DATASET_SIZE}.nt" rm -r td_data diff --git a/bench/bsbm_graphdb.sh b/bench/bsbm_graphdb.sh index 2a3324ed..cabb1d4d 100755 --- a/bench/bsbm_graphdb.sh +++ b/bench/bsbm_graphdb.sh @@ -18,4 +18,5 @@ kill $! sleep 5 rm -r ../graphdb-free-9.3.3/data rm "explore-${DATASET_SIZE}.nt" +rm "explore-update-${DATASET_SIZE}.nt" rm -r td_data diff --git a/bench/bsbm_jena.sh b/bench/bsbm_jena.sh index d1d76def..aa991dca 100755 --- a/bench/bsbm_jena.sh +++ b/bench/bsbm_jena.sh @@ -17,6 +17,7 @@ sleep 60 #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${DATASET_SIZE}.${PARALLELISM}.4.1.0.xml" http://localhost:3030/bsbm/query kill $! rm "explore-${DATASET_SIZE}.nt" +rm "explore-update-${DATASET_SIZE}.nt" rm -r td_data rm -r run rm -r apache-jena-fuseki-4.1.0 diff --git a/bench/bsbm_oxigraph.sh b/bench/bsbm_oxigraph.sh index 2e190439..30e719c6 100755 --- a/bench/bsbm_oxigraph.sh +++ b/bench/bsbm_oxigraph.sh @@ -14,4 +14,5 @@ curl -f -X POST -H 'Content-Type:application/n-triples' --data-binary "@explore- kill $! rm -r oxigraph_data rm "explore-${DATASET_SIZE}.nt" +rm "explore-update-${DATASET_SIZE}.nt" rm -r td_data \ No newline at end of file diff --git a/bench/bsbm_virtuoso.sh b/bench/bsbm_virtuoso.sh index ddbf7d2f..e5f02a94 100755 --- a/bench/bsbm_virtuoso.sh +++ b/bench/bsbm_virtuoso.sh @@ -18,4 +18,5 @@ EOF kill $! rm -r ../database rm "explore-${DATASET_SIZE}.nt" +rm "explore-update-${DATASET_SIZE}.nt" rm -r td_data diff --git a/lib/Cargo.toml b/lib/Cargo.toml index ca65437d..9a151a09 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -51,9 +51,10 @@ js-sys = "0.3" getrandom = {version="0.2", features=["js"]} [dev-dependencies] -rayon = "1" criterion = "0.3" +oxhttp = { version = "^0.1.2", features = ["native-tls"] } sophia_api = { version = "0.7", features = ["test_macro"] } +zstd = "0.9" [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen-test = "0.3" diff --git a/lib/benches/store.rs b/lib/benches/store.rs index 727b5736..815b9590 100644 --- a/lib/benches/store.rs +++ b/lib/benches/store.rs @@ -1,82 +1,119 @@ -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use oxigraph::model::{Dataset, Graph, GraphName, NamedNode, Quad, Triple}; +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use oxhttp::model::{Method, Request, Status}; +use oxigraph::io::GraphFormat; +use oxigraph::model::GraphNameRef; +use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::store::Store; -use rand::random; +use std::fs::File; +use std::io::{BufRead, BufReader, Cursor, Read}; +use std::path::Path; -criterion_group!( - store_load, - graph_load_bench, - dataset_load_bench, - sled_load_bench -); +fn store_load(c: &mut Criterion) { + let mut data = Vec::new(); + read_data("explore-1000.nt.zst") + .read_to_end(&mut data) + .unwrap(); -criterion_main!(store_load); - -fn graph_load_bench(c: &mut Criterion) { - let mut group = c.benchmark_group("graph"); - group.nresamples(10); + let mut group = c.benchmark_group("store load"); + group.throughput(Throughput::Bytes(data.len() as u64)); group.sample_size(10); - for size in [100, 1_000, 10_000] { - group.throughput(Throughput::Elements(size as u64)); - let triples: Vec<_> = create_quads(size).into_iter().map(Triple::from).collect(); - group.bench_function(BenchmarkId::from_parameter(size), |b| { - b.iter(|| triples.iter().collect::()); - }); - } - group.finish(); + group.bench_function("load BSBM explore 1000", |b| { + b.iter(|| { + let store = Store::new().unwrap(); + store + .load_graph( + Cursor::new(&data), + GraphFormat::NTriples, + GraphNameRef::DefaultGraph, + None, + ) + .unwrap(); + }) + }); } -fn dataset_load_bench(c: &mut Criterion) { - let mut group = c.benchmark_group("dataset"); - group.nresamples(10); - group.sample_size(10); - for size in [100, 1_000, 10_000] { - group.throughput(Throughput::Elements(size as u64)); - let quads = create_quads(size); - group.bench_function(BenchmarkId::from_parameter(size), |b| { - b.iter(|| quads.iter().collect::()); - }); - } - group.finish(); -} +fn store_query_and_update(c: &mut Criterion) { + let mut data = Vec::new(); + read_data("explore-1000.nt.zst") + .read_to_end(&mut data) + .unwrap(); + let store = Store::new().unwrap(); + store + .load_graph( + Cursor::new(&data), + GraphFormat::NTriples, + GraphNameRef::DefaultGraph, + None, + ) + .unwrap(); + let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst") + .lines() + .map(|l| { + let l = l.unwrap(); + let mut parts = l.trim().split('\t'); + let kind = parts.next().unwrap(); + let operation = parts.next().unwrap(); + match kind { + "query" => Operation::Query(Query::parse(operation, None).unwrap()), + "update" => Operation::Update(Update::parse(operation, None).unwrap()), + _ => panic!("Unexpected operation kind {}", kind), + } + }) + .collect::>(); -fn sled_load_bench(c: &mut Criterion) { - let mut group = c.benchmark_group("sled"); - group.nresamples(10); + let mut group = c.benchmark_group("store operations"); + group.throughput(Throughput::Elements(operations.len() as u64)); group.sample_size(10); - for size in [100, 1_000, 10_000] { - group.throughput(Throughput::Elements(size as u64)); - let quads = create_quads(size); - group.bench_function(BenchmarkId::from_parameter(size), |b| { - b.iter(|| { - let store = Store::new().unwrap(); - for quad in &quads { - store.insert(quad).unwrap(); + group.bench_function("BSBM explore 1000 queryAndUpdate", |b| { + b.iter(|| { + for operation in &operations { + match operation { + Operation::Query(q) => match store.query(q.clone()).unwrap() { + QueryResults::Boolean(_) => (), + QueryResults::Solutions(s) => { + for s in s { + s.unwrap(); + } + } + QueryResults::Graph(g) => { + for t in g { + t.unwrap(); + } + } + }, + Operation::Update(u) => store.update(u.clone()).unwrap(), } - }); - }); + } + }) + }); +} + +criterion_group!(store, store_query_and_update, store_load); + +criterion_main!(store); + +fn read_data(file: &str) -> impl BufRead { + if !Path::new(file).exists() { + let mut client = oxhttp::Client::new(); + client.set_redirection_limit(5); + let url = format!( + "https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{}", + file + ); + let request = Request::builder(Method::GET, url.parse().unwrap()).build(); + let response = client.request(request).unwrap(); + assert_eq!( + response.status(), + Status::OK, + "{}", + response.into_body().to_string().unwrap() + ); + std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); } - group.finish(); + BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap()) } -fn create_quads(size: u64) -> Vec { - (0..size) - .map(|_| { - Quad::new( - NamedNode::new_unchecked(format!( - "http://example.com/id/{}", - random::() % size - )), - NamedNode::new_unchecked(format!( - "http://example.com/id/{}", - random::() % size - )), - NamedNode::new_unchecked(format!( - "http://example.com/id/{}", - random::() % size - )), - GraphName::DefaultGraph, - ) - }) - .collect() +enum Operation { + Query(Query), + Update(Update), } diff --git a/testsuite/rdf-star b/testsuite/rdf-star index 561ce517..30ac58a9 160000 --- a/testsuite/rdf-star +++ b/testsuite/rdf-star @@ -1 +1 @@ -Subproject commit 561ce517d21f9f17f324ac58ac50be9231804255 +Subproject commit 30ac58a9a2717451242e06ec0ea241b72490183b