Setup codspeed and improves benchmarks

pull/712/head
Tpt 12 months ago committed by Thomas Tanon
parent bdf5d593ee
commit 790501e1b3
  1. 15
      .github/workflows/tests.yml
  2. 55
      Cargo.lock
  3. 2
      lib/Cargo.toml
  4. 168
      lib/benches/store.rs
  5. 2
      lints/test_debian_compatibility.py
  6. 4
      testsuite/Cargo.toml
  7. 97
      testsuite/benches/parser.rs

@ -429,3 +429,18 @@ jobs:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup-rust
- run: python lints/test_debian_compatibility.py
codspeed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: cargo install cargo-codspeed || true
- run: cargo codspeed build -p oxigraph --features http-client-native-tls
- run: cargo codspeed build -p oxigraph-testsuite
- uses: CodSpeedHQ/action@v2
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}

55
Cargo.lock generated

@ -328,12 +328,44 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "codspeed"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eb4ab4dcb6554eb4f590fb16f99d3b102ab76f5f56554c9a5340518b32c499b"
dependencies = [
"colored",
"libc",
"serde_json",
]
[[package]]
name = "codspeed-criterion-compat"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc07a3d3f7e0c8961d0ffdee149d39b231bafdcdc3d978dc5ad790c615f55f3f"
dependencies = [
"codspeed",
"colored",
"criterion",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "colored"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8"
dependencies = [
"lazy_static",
"windows-sys 0.48.0",
]
[[package]]
name = "console_error_panic_hook"
version = "0.1.7"
@ -1003,7 +1035,7 @@ dependencies = [
name = "oxigraph"
version = "0.4.0-alpha.1-dev"
dependencies = [
"criterion",
"codspeed-criterion-compat",
"digest",
"getrandom",
"hex",
@ -1064,11 +1096,9 @@ version = "0.4.0-alpha.1-dev"
dependencies = [
"anyhow",
"clap",
"criterion",
"codspeed-criterion-compat",
"oxigraph",
"oxttl",
"rio_api",
"rio_turtle",
"spargebra",
"sparopt",
"text-diff",
@ -1500,23 +1530,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "rio_api"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1924fa1f0e6d851f9b73b3c569e607c368a0d92995d99d563ad7bf1414696603"
[[package]]
name = "rio_turtle"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cec59971eafd99b9c7e3544bfcabafea81a7072ac51c9f46985ca0bd7ba6016"
dependencies = [
"oxilangtag",
"oxiri",
"rio_api",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"

@ -54,7 +54,7 @@ getrandom = "0.2.8"
js-sys = { version = "0.3.60", optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.5"
codspeed-criterion-compat = "2.3.3"
oxhttp = "0.2.0-alpha.3"
zstd = ">=0.12, <0.14"

@ -1,65 +1,56 @@
#![allow(clippy::panic)]
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status};
use oxigraph::io::RdfFormat;
use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Read};
use std::io::Read;
use std::path::{Path, PathBuf};
use std::str;
fn parse_nt(c: &mut Criterion) {
let data = read_data("explore-1000.nt.zst");
let mut group = c.benchmark_group("parse");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(50);
group.bench_function("parse BSBM explore 1000", |b| {
b.iter(|| {
for r in RdfParser::from_format(RdfFormat::NTriples).parse_read(data.as_slice()) {
r.unwrap();
}
})
});
}
fn store_load(c: &mut Criterion) {
{
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let store = Store::new().unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
let data = read_data("explore-1000.nt.zst");
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let store = Store::new().unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path).unwrap();
do_bulk_load(&store, &data);
})
});
}
fn do_load(store: &Store, data: &[u8]) {
@ -76,23 +67,12 @@ fn do_bulk_load(store: &Store, data: &[u8]) {
}
fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = BufReader::new(read_data("mix-exploreAndUpdate-1000.tsv.zst"))
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {kind}"),
}
let data = read_data("explore-1000.nt.zst");
let operations = bsbm_sparql_operation()
.into_iter()
.map(|op| match op {
RawOperation::Query(q) => Operation::Query(Query::parse(&q, None).unwrap()),
RawOperation::Update(q) => Operation::Update(Update::parse(&q, None).unwrap()),
})
.collect::<Vec<_>>();
let query_operations = operations
@ -151,26 +131,7 @@ fn run_operation(store: &Store, operations: &[Operation]) {
}
fn sparql_parsing(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = BufReader::new(read_data("mix-exploreAndUpdate-1000.tsv.zst"))
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.to_owned()),
"update" => RawOperation::Update(operation.to_owned()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect::<Vec<_>>();
let operations = bsbm_sparql_operation();
let mut group = c.benchmark_group("sparql parsing");
group.sample_size(10);
group.throughput(Throughput::Bytes(
@ -198,11 +159,12 @@ fn sparql_parsing(c: &mut Criterion) {
});
}
criterion_group!(parse, parse_nt);
criterion_group!(store, sparql_parsing, store_query_and_update, store_load);
criterion_main!(store);
criterion_main!(parse, store);
fn read_data(file: &str) -> impl Read {
fn read_data(file: &str) -> Vec<u8> {
if !Path::new(file).exists() {
let client = oxhttp::Client::new().with_redirection_limit(5);
let url = format!("https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{file}");
@ -216,7 +178,31 @@ fn read_data(file: &str) -> impl Read {
);
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
}
zstd::Decoder::new(File::open(file).unwrap()).unwrap()
let mut buf = Vec::new();
zstd::Decoder::new(File::open(file).unwrap())
.unwrap()
.read_to_end(&mut buf)
.unwrap();
buf
}
fn bsbm_sparql_operation() -> Vec<RawOperation> {
String::from_utf8(read_data("mix-exploreAndUpdate-1000.tsv.zst"))
.unwrap()
.lines()
.rev()
.take(300) // We take only 10 groups
.map(|l| {
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => RawOperation::Query(operation.into()),
"update" => RawOperation::Update(operation.into()),
_ => panic!("Unexpected operation kind {kind}"),
}
})
.collect()
}
#[derive(Clone)]

@ -5,7 +5,7 @@ from urllib.request import urlopen
TARGET_DEBIAN_VERSIONS = ["sid"]
IGNORE_PACKAGES = {"oxigraph-js", "oxigraph-testsuite", "pyoxigraph", "sparql-smith"}
ALLOWED_MISSING_PACKAGES = {"escargot", "json-event-parser", "oxhttp", "quick-xml"}
ALLOWED_MISSING_PACKAGES = {"codspeed-criterion-compat", "escargot", "json-event-parser", "oxhttp", "quick-xml"}
base_path = Path(__file__).parent.parent

@ -21,9 +21,7 @@ text-diff = "0.4"
time = { version = "0.3", features = ["formatting"] }
[dev-dependencies]
criterion = "0.5"
rio_api = "0.8"
rio_turtle = "0.8"
codspeed-criterion-compat = "2.3.3"
[lints]
workspace = true

@ -1,11 +1,11 @@
#![allow(clippy::print_stderr)]
use anyhow::Result;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use codspeed_criterion_compat::{
criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use oxigraph_testsuite::files::read_file;
use oxigraph_testsuite::manifest::TestManifest;
use rio_api::parser::*;
use rio_turtle::*;
use std::io::Read;
fn test_data_from_testsuite(manifest_uri: String, include_tests_types: &[&str]) -> Result<Vec<u8>> {
@ -53,7 +53,7 @@ fn parse_bench(
group.finish();
}
fn parse_oxttl_ntriples(c: &mut Criterion, name: &str, data: &[u8]) {
fn parse_ntriples(c: &mut Criterion, name: &str, data: &[u8]) {
parse_bench(c, "oxttl ntriples", name, data, |data| {
let mut parser = oxttl::NTriplesParser::new().parse();
parser.extend_from_slice(data);
@ -64,7 +64,7 @@ fn parse_oxttl_ntriples(c: &mut Criterion, name: &str, data: &[u8]) {
});
}
fn parse_oxttl_turtle(c: &mut Criterion, name: &str, data: &[u8]) {
fn parse_turtle(c: &mut Criterion, name: &str, data: &[u8]) {
parse_bench(c, "oxttl turtle", name, data, |data| {
let mut parser = oxttl::TurtleParser::new().parse();
parser.extend_from_slice(data);
@ -75,74 +75,8 @@ fn parse_oxttl_turtle(c: &mut Criterion, name: &str, data: &[u8]) {
});
}
fn parse_rio_ntriples(c: &mut Criterion, name: &str, data: &[u8]) {
parse_bench(c, "rio ntriples", name, data, |data| {
let mut count: u64 = 0;
NTriplesParser::new(data)
.parse_all::<TurtleError>(&mut |_| {
count += 1;
Ok(())
})
.unwrap();
});
}
fn parse_rio_turtle(c: &mut Criterion, name: &str, data: &[u8]) {
parse_bench(c, "rio turtle", name, data, |data| {
let mut count: u64 = 0;
TurtleParser::new(data, None)
.parse_all::<TurtleError>(&mut |_| {
count += 1;
Ok(())
})
.unwrap();
});
}
fn bench_parse_oxttl_ntriples_with_ntriples(c: &mut Criterion) {
parse_oxttl_ntriples(
c,
"ntriples",
&match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_oxttl_ntriples_with_turtle(c: &mut Criterion) {
parse_oxttl_turtle(
c,
"ntriples",
&match ntriples_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_oxttl_turtle_with_turtle(c: &mut Criterion) {
parse_oxttl_turtle(
c,
"turtle",
&match turtle_test_data() {
Ok(d) => d,
Err(e) => {
eprintln!("{e}");
return;
}
},
)
}
fn bench_parse_rio_ntriples_with_ntriples(c: &mut Criterion) {
parse_rio_ntriples(
fn bench_parse_ntriples_with_ntriples(c: &mut Criterion) {
parse_ntriples(
c,
"ntriples",
&match ntriples_test_data() {
@ -155,8 +89,8 @@ fn bench_parse_rio_ntriples_with_ntriples(c: &mut Criterion) {
)
}
fn bench_parse_rio_ntriples_with_turtle(c: &mut Criterion) {
parse_rio_turtle(
fn bench_parse_ntriples_with_turtle(c: &mut Criterion) {
parse_turtle(
c,
"ntriples",
&match ntriples_test_data() {
@ -169,8 +103,8 @@ fn bench_parse_rio_ntriples_with_turtle(c: &mut Criterion) {
)
}
fn bench_parse_rio_turtle_with_turtle(c: &mut Criterion) {
parse_rio_turtle(
fn bench_parse_turtle_with_turtle(c: &mut Criterion) {
parse_turtle(
c,
"turtle",
&match turtle_test_data() {
@ -185,12 +119,9 @@ fn bench_parse_rio_turtle_with_turtle(c: &mut Criterion) {
criterion_group!(
w3c_testsuite,
bench_parse_rio_ntriples_with_ntriples,
bench_parse_rio_ntriples_with_turtle,
bench_parse_rio_turtle_with_turtle,
bench_parse_oxttl_ntriples_with_ntriples,
bench_parse_oxttl_ntriples_with_turtle,
bench_parse_oxttl_turtle_with_turtle
bench_parse_ntriples_with_ntriples,
bench_parse_ntriples_with_turtle,
bench_parse_turtle_with_turtle
);
criterion_main!(w3c_testsuite);

Loading…
Cancel
Save