#![allow(clippy::panic)] use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use oxhttp::model::{Method, Request, Status}; use oxigraph::io::RdfFormat; use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::store::Store; use rand::random; use std::env::temp_dir; use std::fs::{remove_dir_all, File}; use std::io::{BufRead, BufReader, Read}; use std::path::{Path, PathBuf}; fn store_load(c: &mut Criterion) { { let mut data = Vec::new(); read_data("explore-1000.nt.zst") .read_to_end(&mut data) .unwrap(); let mut group = c.benchmark_group("store load"); group.throughput(Throughput::Bytes(data.len() as u64)); group.sample_size(10); group.bench_function("load BSBM explore 1000 in memory", |b| { b.iter(|| { let store = Store::new().unwrap(); do_load(&store, &data); }) }); group.bench_function("load BSBM explore 1000 in on disk", |b| { b.iter(|| { let path = TempDir::default(); let store = Store::open(&path).unwrap(); do_load(&store, &data); }) }); group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { b.iter(|| { let path = TempDir::default(); let store = Store::open(&path).unwrap(); do_bulk_load(&store, &data); }) }); } { let mut data = Vec::new(); read_data("explore-10000.nt.zst") .read_to_end(&mut data) .unwrap(); let mut group = c.benchmark_group("store load large"); group.throughput(Throughput::Bytes(data.len() as u64)); group.sample_size(10); group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { b.iter(|| { let path = TempDir::default(); let store = Store::open(&path).unwrap(); do_bulk_load(&store, &data); }) }); } } fn do_load(store: &Store, data: &[u8]) { store.load_from_read(RdfFormat::NTriples, data).unwrap(); store.optimize().unwrap(); } fn do_bulk_load(store: &Store, data: &[u8]) { store .bulk_loader() .load_from_read(RdfFormat::NTriples, data) .unwrap(); store.optimize().unwrap(); } fn store_query_and_update(c: &mut Criterion) { let mut data = Vec::new(); read_data("explore-1000.nt.zst") .read_to_end(&mut data) .unwrap(); let operations = BufReader::new(read_data("mix-exploreAndUpdate-1000.tsv.zst")) .lines() .map(|l| { let l = l.unwrap(); let mut parts = l.trim().split('\t'); let kind = parts.next().unwrap(); let operation = parts.next().unwrap(); match kind { "query" => Operation::Query(Query::parse(operation, None).unwrap()), "update" => Operation::Update(Update::parse(operation, None).unwrap()), _ => panic!("Unexpected operation kind {kind}"), } }) .collect::>(); let query_operations = operations .iter() .filter(|o| matches!(o, Operation::Query(_))) .cloned() .collect::>(); let mut group = c.benchmark_group("store operations"); group.throughput(Throughput::Elements(operations.len() as u64)); group.sample_size(10); { let memory_store = Store::new().unwrap(); do_bulk_load(&memory_store, &data); group.bench_function("BSBM explore 1000 query in memory", |b| { b.iter(|| run_operation(&memory_store, &query_operations)) }); group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| { b.iter(|| run_operation(&memory_store, &operations)) }); } { let path = TempDir::default(); let disk_store = Store::open(&path).unwrap(); do_bulk_load(&disk_store, &data); group.bench_function("BSBM explore 1000 query on disk", |b| { b.iter(|| run_operation(&disk_store, &query_operations)) }); group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| { b.iter(|| run_operation(&disk_store, &operations)) }); } } fn run_operation(store: &Store, operations: &[Operation]) { for operation in operations { match operation { Operation::Query(q) => match store.query(q.clone()).unwrap() { QueryResults::Boolean(_) => (), QueryResults::Solutions(s) => { for s in s { s.unwrap(); } } QueryResults::Graph(g) => { for t in g { t.unwrap(); } } }, Operation::Update(u) => store.update(u.clone()).unwrap(), } } } fn sparql_parsing(c: &mut Criterion) { let mut data = Vec::new(); read_data("explore-1000.nt.zst") .read_to_end(&mut data) .unwrap(); let operations = BufReader::new(read_data("mix-exploreAndUpdate-1000.tsv.zst")) .lines() .map(|l| { let l = l.unwrap(); let mut parts = l.trim().split('\t'); let kind = parts.next().unwrap(); let operation = parts.next().unwrap(); match kind { "query" => RawOperation::Query(operation.to_owned()), "update" => RawOperation::Update(operation.to_owned()), _ => panic!("Unexpected operation kind {kind}"), } }) .collect::>(); let mut group = c.benchmark_group("sparql parsing"); group.sample_size(10); group.throughput(Throughput::Bytes( operations .iter() .map(|o| match o { RawOperation::Query(q) => q.len(), RawOperation::Update(u) => u.len(), }) .sum::() as u64, )); group.bench_function("BSBM query and update set", |b| { b.iter(|| { for operation in &operations { match operation { RawOperation::Query(q) => { Query::parse(q, None).unwrap(); } RawOperation::Update(u) => { Update::parse(u, None).unwrap(); } } } }) }); } criterion_group!(store, sparql_parsing, store_query_and_update, store_load); criterion_main!(store); fn read_data(file: &str) -> impl Read { if !Path::new(file).exists() { let client = oxhttp::Client::new().with_redirection_limit(5); let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}"); let request = Request::builder(Method::GET, url.parse().unwrap()).build(); let response = client.request(request).unwrap(); assert_eq!( response.status(), Status::OK, "{}", response.into_body().to_string().unwrap() ); std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap(); } zstd::Decoder::new(File::open(file).unwrap()).unwrap() } #[derive(Clone)] enum RawOperation { Query(String), Update(String), } #[allow(clippy::large_enum_variant)] #[derive(Clone)] enum Operation { Query(Query), Update(Update), } struct TempDir(PathBuf); impl Default for TempDir { fn default() -> Self { Self(temp_dir().join(format!("oxigraph-bench-{}", random::()))) } } impl AsRef for TempDir { fn as_ref(&self) -> &Path { &self.0 } } impl Drop for TempDir { fn drop(&mut self) { remove_dir_all(&self.0).unwrap() } }