Exposes the query profiler to the CLI app

pull/470/head
Tpt 2 years ago committed by Thomas Tanon
parent dcd59ac4dd
commit 60ffd99ad8
  1. 2
      Cargo.lock
  2. 56
      bench/explanation_to_flamegraph.py
  3. 52
      bench/explanation_to_trace.py
  4. 2
      lib/Cargo.toml
  5. 13
      lib/src/sparql/algebra.rs
  6. 158
      lib/src/sparql/eval.rs
  7. 84
      lib/src/sparql/mod.rs
  8. 398
      lib/src/sparql/plan.rs
  9. 5
      lib/src/sparql/update.rs
  10. 39
      lib/src/store.rs
  11. 6
      lib/tests/store.rs
  12. 55
      server/src/main.rs

2
Cargo.lock generated

@ -932,6 +932,8 @@ dependencies = [
"digest", "digest",
"getrandom", "getrandom",
"hex", "hex",
"js-sys",
"json-event-parser",
"lazy_static", "lazy_static",
"libc", "libc",
"md-5", "md-5",

@ -0,0 +1,56 @@
"""
Converts a SPARQL query JSON explanation file to a flamegraph.
Usage: python explanation_to_flamegraph.py explanation.json flamegraph.svg
"""
import json
import subprocess
from argparse import ArgumentParser
from pathlib import Path
from shutil import which
from tempfile import NamedTemporaryFile
if which('flamegraph.pl') is None:
raise Exception(
'This script requires the flamegraph.pl script from https://github.com/brendangregg/FlameGraph to be installed and be in $PATH.')
parser = ArgumentParser(
prog='OxigraphFlamegraph',
description='Builds a flamegraph from the Oxigraph query explanation JSON format',
epilog='Text at the bottom of help')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('flamegraph_svg', type=Path)
args = parser.parse_args()
def trace_line(label: str, value: float):
return f"{label} {int(value * 1_000_000)}"
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
if "parsing duration in seconds" in explanation:
trace.append(trace_line("parsing", explanation['parsing duration in seconds']))
if "planning duration in seconds" in explanation:
trace.append(trace_line("planning", explanation['planning duration in seconds']))
already_used_names = {}
def add_to_trace(node, path):
path = f"{path};{node['name'].replace(' ', '`')}"
if path in already_used_names:
already_used_names[path] += 1
path = f"{path}`{already_used_names[path]}"
else:
already_used_names[path] = 0
samples = node['duration in seconds'] - sum(child['duration in seconds'] for child in node.get("children", ()))
trace.append(trace_line(path, samples))
for i, child in enumerate(node.get("children", ())):
add_to_trace(child, path)
add_to_trace(explanation["plan"], 'eval')
with NamedTemporaryFile('w+t') as fp:
fp.write('\n'.join(trace))
fp.flush()
args.flamegraph_svg.write_bytes(subprocess.run(['flamegraph.pl', fp.name], stdout=subprocess.PIPE).stdout)

@ -0,0 +1,52 @@
"""
Converts a SPARQL query JSON explanation file to a tracing event file compatible with Chrome.
Usage: python explanation_to_trace.py explanation.json trace.json
"""
import json
from argparse import ArgumentParser
from pathlib import Path
parser = ArgumentParser(
prog='OxigraphTracing',
description='Builds a Trace Event Format file from the Oxigraph query explanation JSON format')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('json_trace_event', type=Path)
args = parser.parse_args()
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
def trace_element(name: str, cat: str, start_s: float, duration_s: float):
return {
"name": name,
"cat": cat,
"ph": "X",
"ts": int(start_s * 1_000_000),
"dur": int(duration_s * 1_000_000),
"pid": 1
}
def add_to_trace(node, path, start_time: float):
path = f"{path};{node['name'].replace(' ', '`')}"
trace.append(trace_element(node["name"], node["name"].split("(")[0], start_time, node["duration in seconds"]))
for child in node.get("children", ()):
add_to_trace(child, path, start_time)
start_time += child["duration in seconds"]
current_time = 0
if "parsing duration in seconds" in explanation:
d = explanation["parsing duration in seconds"]
trace.append(trace_element(f"parsing", "parsing", current_time, d))
current_time += d
if "planning duration in seconds" in explanation:
d = explanation["planning duration in seconds"]
trace.append(trace_element(f"planning", "planning", current_time, d))
current_time += d
add_to_trace(explanation["plan"], 'eval', current_time)
with args.json_trace_event.open("wt") as fp:
json.dump(trace, fp)

@ -36,6 +36,7 @@ rio_xml = "0.8"
hex = "0.4" hex = "0.4"
siphasher = "0.3" siphasher = "0.3"
lazy_static = "1" lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" } oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" }
spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
@ -48,6 +49,7 @@ oxhttp = { version = "0.1", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", features = ["js"] } getrandom = { version = "0.2", features = ["js"] }
js-sys = "0.3"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies] [target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.4" criterion = "0.4"

@ -5,9 +5,11 @@
//! Warning: this implementation is an unstable work in progress //! Warning: this implementation is an unstable work in progress
use crate::model::*; use crate::model::*;
use crate::sparql::eval::Timer;
use spargebra::GraphUpdateOperation; use spargebra::GraphUpdateOperation;
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/). /// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
/// ///
@ -30,13 +32,19 @@ use std::str::FromStr;
pub struct Query { pub struct Query {
pub(super) inner: spargebra::Query, pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset, pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<Duration>,
} }
impl Query { impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> { pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> {
let query = spargebra::Query::parse(query, base_iri)?; let start = Timer::now();
Ok(query.into()) let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self {
dataset: query.dataset,
inner: query.inner,
parsing_duration: Some(start.elapsed()),
})
} }
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
@ -90,6 +98,7 @@ impl From<spargebra::Query> for Query {
| spargebra::Query::Ask { dataset, .. } => dataset, | spargebra::Query::Ask { dataset, .. } => dataset,
}), }),
inner: query, inner: query,
parsing_duration: None,
} }
} }
} }

@ -28,6 +28,8 @@ use std::iter::Iterator;
use std::iter::{empty, once}; use std::iter::{empty, once};
use std::rc::Rc; use std::rc::Rc;
use std::str; use std::str;
use std::time::Duration as StdDuration;
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
use std::time::Instant; use std::time::Instant;
const REGEX_SIZE_LIMIT: usize = 1_000_000; const REGEX_SIZE_LIMIT: usize = 1_000_000;
@ -42,6 +44,7 @@ pub struct SimpleEvaluator {
now: DateTime, now: DateTime,
service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>, service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>,
custom_functions: Rc<CustomFunctionRegistry>, custom_functions: Rc<CustomFunctionRegistry>,
run_stats: bool,
} }
impl SimpleEvaluator { impl SimpleEvaluator {
@ -50,6 +53,7 @@ impl SimpleEvaluator {
base_iri: Option<Rc<Iri<String>>>, base_iri: Option<Rc<Iri<String>>>,
service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>, service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>,
custom_functions: Rc<CustomFunctionRegistry>, custom_functions: Rc<CustomFunctionRegistry>,
run_stats: bool,
) -> Self { ) -> Self {
Self { Self {
dataset, dataset,
@ -57,6 +61,7 @@ impl SimpleEvaluator {
now: DateTime::now().unwrap(), now: DateTime::now().unwrap(),
service_handler, service_handler,
custom_functions, custom_functions,
run_stats,
} }
} }
@ -64,69 +69,76 @@ impl SimpleEvaluator {
&self, &self,
plan: Rc<PlanNode>, plan: Rc<PlanNode>,
variables: Rc<Vec<Variable>>, variables: Rc<Vec<Variable>>,
) -> QueryResults { ) -> (QueryResults, Rc<PlanNodeWithStats>) {
let iter = self.plan_evaluator(plan)(EncodedTuple::with_capacity(variables.len())); let (eval, stats) = self.plan_evaluator(plan);
QueryResults::Solutions(decode_bindings(self.dataset.clone(), iter, variables)) (
QueryResults::Solutions(decode_bindings(
self.dataset.clone(),
eval(EncodedTuple::with_capacity(variables.len())),
variables,
)),
stats,
)
} }
pub fn evaluate_ask_plan(&self, plan: Rc<PlanNode>) -> Result<QueryResults, EvaluationError> { pub fn evaluate_ask_plan(
&self,
plan: Rc<PlanNode>,
) -> (Result<QueryResults, EvaluationError>, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len()); let from = EncodedTuple::with_capacity(plan.used_variables().len());
match self.plan_evaluator(plan)(from).next() { let (eval, stats) = self.plan_evaluator(plan);
(
match eval(from).next() {
Some(Ok(_)) => Ok(QueryResults::Boolean(true)), Some(Ok(_)) => Ok(QueryResults::Boolean(true)),
Some(Err(error)) => Err(error), Some(Err(error)) => Err(error),
None => Ok(QueryResults::Boolean(false)), None => Ok(QueryResults::Boolean(false)),
} },
stats,
)
} }
pub fn evaluate_construct_plan( pub fn evaluate_construct_plan(
&self, &self,
plan: Rc<PlanNode>, plan: Rc<PlanNode>,
template: Vec<TripleTemplate>, template: Vec<TripleTemplate>,
) -> QueryResults { ) -> (QueryResults, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len()); let from = EncodedTuple::with_capacity(plan.used_variables().len());
let (eval, stats) = self.plan_evaluator(plan);
(
QueryResults::Graph(QueryTripleIter { QueryResults::Graph(QueryTripleIter {
iter: Box::new(ConstructIterator { iter: Box::new(ConstructIterator {
eval: self.clone(), eval: self.clone(),
iter: self.plan_evaluator(plan)(from), iter: eval(from),
template, template,
buffered_results: Vec::default(), buffered_results: Vec::default(),
bnodes: Vec::default(), bnodes: Vec::default(),
}), }),
}) }),
stats,
)
} }
pub fn evaluate_describe_plan(&self, plan: Rc<PlanNode>) -> QueryResults { pub fn evaluate_describe_plan(
&self,
plan: Rc<PlanNode>,
) -> (QueryResults, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len()); let from = EncodedTuple::with_capacity(plan.used_variables().len());
let (eval, stats) = self.plan_evaluator(plan);
(
QueryResults::Graph(QueryTripleIter { QueryResults::Graph(QueryTripleIter {
iter: Box::new(DescribeIterator { iter: Box::new(DescribeIterator {
eval: self.clone(), eval: self.clone(),
iter: self.plan_evaluator(plan)(from), iter: eval(from),
quads: Box::new(empty()), quads: Box::new(empty()),
}), }),
}) }),
stats,
)
} }
pub fn plan_evaluator( pub fn plan_evaluator(
&self, &self,
node: Rc<PlanNode>, node: Rc<PlanNode>,
) -> Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator> {
self.build_plan_evaluator(node, false).0
}
pub fn plan_evaluator_with_stats(
&self,
node: Rc<PlanNode>,
) -> (
Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>,
Rc<PlanNodeWithStats>,
) {
self.build_plan_evaluator(node, true)
}
fn build_plan_evaluator(
&self,
node: Rc<PlanNode>,
run_stats: bool,
) -> ( ) -> (
Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>, Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>,
Rc<PlanNodeWithStats>, Rc<PlanNodeWithStats>,
@ -372,9 +384,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables()) .intersection(&right.always_bound_variables())
.copied() .copied()
.collect(); .collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats); stat_children.push(right_stats);
if join_keys.is_empty() { if join_keys.is_empty() {
// Cartesian product // Cartesian product
@ -418,9 +430,9 @@ impl SimpleEvaluator {
} }
} }
PlanNode::ForLoopJoin { left, right } => { PlanNode::ForLoopJoin { left, right } => {
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats); stat_children.push(right_stats);
Rc::new(move |from| { Rc::new(move |from| {
let right = right.clone(); let right = right.clone();
@ -436,9 +448,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables()) .intersection(&right.always_bound_variables())
.copied() .copied()
.collect(); .collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats); stat_children.push(right_stats);
if join_keys.is_empty() { if join_keys.is_empty() {
Rc::new(move |from| { Rc::new(move |from| {
@ -479,9 +491,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables()) .intersection(&right.always_bound_variables())
.copied() .copied()
.collect(); .collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats); stat_children.push(right_stats);
let expression = self.expression_evaluator(expression); let expression = self.expression_evaluator(expression);
// Real hash join // Real hash join
@ -508,9 +520,9 @@ impl SimpleEvaluator {
right, right,
possible_problem_vars, possible_problem_vars,
} => { } => {
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats); stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats); stat_children.push(right_stats);
let possible_problem_vars = possible_problem_vars.clone(); let possible_problem_vars = possible_problem_vars.clone();
Rc::new(move |from| { Rc::new(move |from| {
@ -533,7 +545,7 @@ impl SimpleEvaluator {
}) })
} }
PlanNode::Filter { child, expression } => { PlanNode::Filter { child, expression } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let expression = self.expression_evaluator(expression); let expression = self.expression_evaluator(expression);
Rc::new(move |from| { Rc::new(move |from| {
@ -552,8 +564,7 @@ impl SimpleEvaluator {
let children: Vec<_> = children let children: Vec<_> = children
.iter() .iter()
.map(|child| { .map(|child| {
let (child, child_stats) = let (child, child_stats) = self.plan_evaluator(child.clone());
self.build_plan_evaluator(child.clone(), run_stats);
stat_children.push(child_stats); stat_children.push(child_stats);
child child
}) })
@ -572,7 +583,7 @@ impl SimpleEvaluator {
variable, variable,
expression, expression,
} => { } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let position = variable.encoded; let position = variable.encoded;
let expression = self.expression_evaluator(expression); let expression = self.expression_evaluator(expression);
@ -588,7 +599,7 @@ impl SimpleEvaluator {
}) })
} }
PlanNode::Sort { child, by } => { PlanNode::Sort { child, by } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let by: Vec<_> = by let by: Vec<_> = by
.iter() .iter()
@ -646,12 +657,12 @@ impl SimpleEvaluator {
}) })
} }
PlanNode::HashDeduplicate { child } => { PlanNode::HashDeduplicate { child } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
Rc::new(move |from| Box::new(hash_deduplicate(child(from)))) Rc::new(move |from| Box::new(hash_deduplicate(child(from))))
} }
PlanNode::Reduced { child } => { PlanNode::Reduced { child } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
Rc::new(move |from| { Rc::new(move |from| {
Box::new(ConsecutiveDeduplication { Box::new(ConsecutiveDeduplication {
@ -661,19 +672,19 @@ impl SimpleEvaluator {
}) })
} }
PlanNode::Skip { child, count } => { PlanNode::Skip { child, count } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let count = *count; let count = *count;
Rc::new(move |from| Box::new(child(from).skip(count))) Rc::new(move |from| Box::new(child(from).skip(count)))
} }
PlanNode::Limit { child, count } => { PlanNode::Limit { child, count } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let count = *count; let count = *count;
Rc::new(move |from| Box::new(child(from).take(count))) Rc::new(move |from| Box::new(child(from).take(count)))
} }
PlanNode::Project { child, mapping } => { PlanNode::Project { child, mapping } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let mapping = mapping.clone(); let mapping = mapping.clone();
Rc::new(move |from| { Rc::new(move |from| {
@ -713,7 +724,7 @@ impl SimpleEvaluator {
key_variables, key_variables,
aggregates, aggregates,
} => { } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats); stat_children.push(child_stats);
let key_variables = key_variables.clone(); let key_variables = key_variables.clone();
let aggregate_input_expressions: Vec<_> = aggregates let aggregate_input_expressions: Vec<_> = aggregates
@ -810,10 +821,10 @@ impl SimpleEvaluator {
exec_count: Cell::new(0), exec_count: Cell::new(0),
exec_duration: Cell::new(std::time::Duration::from_secs(0)), exec_duration: Cell::new(std::time::Duration::from_secs(0)),
}); });
if run_stats { if self.run_stats {
let stats = stats.clone(); let stats = stats.clone();
evaluator = Rc::new(move |tuple| { evaluator = Rc::new(move |tuple| {
let start = Instant::now(); let start = Timer::now();
let inner = evaluator(tuple); let inner = evaluator(tuple);
stats stats
.exec_duration .exec_duration
@ -845,6 +856,7 @@ impl SimpleEvaluator {
base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()), base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()),
}, },
dataset: QueryDataset::new(), dataset: QueryDataset::new(),
parsing_duration: None,
}, },
)? { )? {
Ok(encode_bindings(self.dataset.clone(), variables, iter)) Ok(encode_bindings(self.dataset.clone(), variables, iter))
@ -933,7 +945,7 @@ impl SimpleEvaluator {
Rc::new(move |tuple| tuple.get(v).cloned()) Rc::new(move |tuple| tuple.get(v).cloned())
} }
PlanExpression::Exists(plan) => { PlanExpression::Exists(plan) => {
let eval = self.plan_evaluator(plan.clone()); let (eval, _) = self.plan_evaluator(plan.clone()); //TODO: stats
Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into()))
} }
PlanExpression::Or(a, b) => { PlanExpression::Or(a, b) => {
@ -4775,7 +4787,7 @@ impl Iterator for StatsIterator {
type Item = Result<EncodedTuple, EvaluationError>; type Item = Result<EncodedTuple, EvaluationError>;
fn next(&mut self) -> Option<Result<EncodedTuple, EvaluationError>> { fn next(&mut self) -> Option<Result<EncodedTuple, EvaluationError>> {
let start = Instant::now(); let start = Timer::now();
let result = self.inner.next(); let result = self.inner.next();
self.stats self.stats
.exec_duration .exec_duration
@ -4787,6 +4799,42 @@ impl Iterator for StatsIterator {
} }
} }
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
pub struct Timer {
timestamp_ms: f64,
}
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
impl Timer {
pub fn now() -> Self {
Self {
timestamp_ms: js_sys::Date::now(),
}
}
pub fn elapsed(&self) -> StdDuration {
StdDuration::from_secs_f64((js_sys::Date::now() - self.timestamp_ms) / 1000.)
}
}
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
pub struct Timer {
instant: Instant,
}
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
impl Timer {
pub fn now() -> Self {
Self {
instant: Instant::now(),
}
}
pub fn elapsed(&self) -> StdDuration {
self.instant.elapsed()
}
}
#[test] #[test]
fn uuid() { fn uuid() {
let mut buffer = String::default(); let mut buffer = String::default();

@ -17,29 +17,34 @@ use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update}; pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView; use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::{EvaluationError, QueryError}; pub use crate::sparql::error::{EvaluationError, QueryError};
use crate::sparql::eval::SimpleEvaluator; use crate::sparql::eval::{SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
use crate::sparql::plan::PlanNodeWithStats;
use crate::sparql::plan_builder::PlanBuilder; use crate::sparql::plan_builder::PlanBuilder;
pub use crate::sparql::service::ServiceHandler; pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update; pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader; use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, JsonWriter};
pub use oxrdf::{Variable, VariableNameParseError}; pub use oxrdf::{Variable, VariableNameParseError};
pub use sparesults::QueryResultsFormat; pub use sparesults::QueryResultsFormat;
pub use spargebra::ParseError; pub use spargebra::ParseError;
use std::collections::HashMap; use std::collections::HashMap;
use std::rc::Rc; use std::rc::Rc;
use std::time::Duration; use std::time::Duration;
use std::{fmt, io};
#[allow(clippy::needless_pass_by_value)] #[allow(clippy::needless_pass_by_value)]
pub(crate) fn evaluate_query( pub(crate) fn evaluate_query(
reader: StorageReader, reader: StorageReader,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>, query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions, options: QueryOptions,
) -> Result<QueryResults, EvaluationError> { run_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
let query = query.try_into().map_err(Into::into)?; let query = query.try_into().map_err(Into::into)?;
let dataset = DatasetView::new(reader, &query.dataset); let dataset = DatasetView::new(reader, &query.dataset);
match query.inner { let start_planning = Timer::now();
let (results, plan_node_with_stats, planning_duration) = match query.inner {
spargebra::Query::Select { spargebra::Query::Select {
pattern, base_iri, .. pattern, base_iri, ..
} => { } => {
@ -50,13 +55,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions, &options.custom_functions,
options.without_optimizations, options.without_optimizations,
)?; )?;
Ok(SimpleEvaluator::new( let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Rc::new(options.custom_functions),
run_stats,
) )
.evaluate_select_plan(Rc::new(plan), Rc::new(variables))) .evaluate_select_plan(Rc::new(plan), Rc::new(variables));
(Ok(results), explanation, planning_duration)
} }
spargebra::Query::Ask { spargebra::Query::Ask {
pattern, base_iri, .. pattern, base_iri, ..
@ -68,13 +76,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions, &options.custom_functions,
options.without_optimizations, options.without_optimizations,
)?; )?;
SimpleEvaluator::new( let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Rc::new(options.custom_functions),
run_stats,
) )
.evaluate_ask_plan(Rc::new(plan)) .evaluate_ask_plan(Rc::new(plan));
(results, explanation, planning_duration)
} }
spargebra::Query::Construct { spargebra::Query::Construct {
template, template,
@ -96,13 +107,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions, &options.custom_functions,
options.without_optimizations, options.without_optimizations,
); );
Ok(SimpleEvaluator::new( let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Rc::new(options.custom_functions),
run_stats,
) )
.evaluate_construct_plan(Rc::new(plan), construct)) .evaluate_construct_plan(Rc::new(plan), construct);
(Ok(results), explanation, planning_duration)
} }
spargebra::Query::Describe { spargebra::Query::Describe {
pattern, base_iri, .. pattern, base_iri, ..
@ -114,15 +128,25 @@ pub(crate) fn evaluate_query(
&options.custom_functions, &options.custom_functions,
options.without_optimizations, options.without_optimizations,
)?; )?;
Ok(SimpleEvaluator::new( let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),
options.service_handler(), options.service_handler(),
Rc::new(options.custom_functions), Rc::new(options.custom_functions),
run_stats,
) )
.evaluate_describe_plan(Rc::new(plan))) .evaluate_describe_plan(Rc::new(plan));
} (Ok(results), explanation, planning_duration)
} }
};
let explanation = QueryExplanation {
inner: plan_node_with_stats,
with_stats: run_stats,
parsing_duration: query.parsing_duration,
planning_duration,
};
Ok((results, explanation))
} }
/// Options for SPARQL query evaluation. /// Options for SPARQL query evaluation.
@ -257,3 +281,39 @@ impl From<QueryOptions> for UpdateOptions {
Self { query_options } Self { query_options }
} }
} }
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<PlanNodeWithStats>,
with_stats: bool,
parsing_duration: Option<Duration>,
planning_duration: Duration,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, output: impl io::Write) -> io::Result<()> {
let mut writer = JsonWriter::from_writer(output);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&parsing_duration.as_secs_f32().to_string(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&self.planning_duration.as_secs_f32().to_string(),
))?;
writer.write_event(JsonEvent::ObjectKey("plan"))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
}
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.inner)
}
}

@ -1,6 +1,7 @@
use crate::model::{BlankNode, Literal, NamedNode, Term, Triple}; use crate::model::{BlankNode, Literal, NamedNode, Term, Triple};
use crate::sparql::Variable; use crate::sparql::Variable;
use crate::storage::numeric_encoder::EncodedTerm; use crate::storage::numeric_encoder::EncodedTerm;
use json_event_parser::{JsonEvent, JsonWriter};
use regex::Regex; use regex::Regex;
use spargebra::algebra::GraphPattern; use spargebra::algebra::GraphPattern;
use spargebra::term::GroundTerm; use spargebra::term::GroundTerm;
@ -10,6 +11,7 @@ use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::rc::Rc; use std::rc::Rc;
use std::time::Duration; use std::time::Duration;
use std::{fmt, io};
#[derive(Debug)] #[derive(Debug)]
pub enum PlanNode { pub enum PlanNode {
@ -387,6 +389,12 @@ pub struct PlanTerm<T> {
pub plain: T, pub plain: T,
} }
impl<T: fmt::Display> fmt::Display for PlanTerm<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.plain)
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum PatternValue { pub enum PatternValue {
Constant(PlanTerm<PatternValueConstant>), Constant(PlanTerm<PatternValueConstant>),
@ -394,6 +402,16 @@ pub enum PatternValue {
TriplePattern(Box<TriplePatternValue>), TriplePattern(Box<TriplePatternValue>),
} }
impl fmt::Display for PatternValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Constant(c) => write!(f, "{c}"),
Self::Variable(v) => write!(f, "{v}"),
Self::TriplePattern(p) => write!(f, "{p}"),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum PatternValueConstant { pub enum PatternValueConstant {
NamedNode(NamedNode), NamedNode(NamedNode),
@ -402,6 +420,17 @@ pub enum PatternValueConstant {
DefaultGraph, DefaultGraph,
} }
impl fmt::Display for PatternValueConstant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NamedNode(n) => write!(f, "{n}"),
Self::Literal(l) => write!(f, "{l}"),
Self::Triple(t) => write!(f, "<< {t} >>"),
Self::DefaultGraph => f.write_str("DEFAULT"),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct TriplePatternValue { pub struct TriplePatternValue {
pub subject: PatternValue, pub subject: PatternValue,
@ -409,12 +438,24 @@ pub struct TriplePatternValue {
pub object: PatternValue, pub object: PatternValue,
} }
impl fmt::Display for TriplePatternValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {} {}", self.subject, self.predicate, self.object)
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct PlanVariable<P = Variable> { pub struct PlanVariable<P = Variable> {
pub encoded: usize, pub encoded: usize,
pub plain: P, pub plain: P,
} }
impl<P: fmt::Display> fmt::Display for PlanVariable<P> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.plain)
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum PlanExpression { pub enum PlanExpression {
NamedNode(PlanTerm<NamedNode>), NamedNode(PlanTerm<NamedNode>),
@ -625,6 +666,144 @@ impl PlanExpression {
} }
} }
impl fmt::Display for PlanExpression {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Variable(v) => {
write!(f, "{v}")
}
Self::Bound(v) => {
write!(f, "Bound({v})")
}
Self::NamedNode(n) => write!(f, "{n}"),
Self::Literal(l) => write!(f, "{l}"),
Self::Rand => write!(f, "Rand()"),
Self::Now => write!(f, "Now()"),
Self::Uuid => write!(f, "Uuid()"),
Self::StrUuid => write!(f, "StrUuid()"),
Self::UnaryPlus(e) => write!(f, "UnaryPlus({e})"),
Self::UnaryMinus(e) => write!(f, "UnaryMinus({e})"),
Self::Not(e) => write!(f, "Not({e})"),
Self::BNode(e) => {
if let Some(e) = e {
write!(f, "BNode({e})")
} else {
write!(f, "BNode()")
}
}
Self::Str(e) => write!(f, "Str({e})"),
Self::Lang(e) => write!(f, "Lang({e})"),
Self::Datatype(e) => write!(f, "Datatype({e})"),
Self::Iri(e) => write!(f, "Iri({e})"),
Self::Abs(e) => write!(f, "Abs({e})"),
Self::Ceil(e) => write!(f, "Ceil({e})"),
Self::Floor(e) => write!(f, "Floor({e})"),
Self::Round(e) => write!(f, "Round({e})"),
Self::UCase(e) => write!(f, "UCase({e})"),
Self::LCase(e) => write!(f, "LCase({e})"),
Self::StrLen(e) => write!(f, "StrLen({e})"),
Self::EncodeForUri(e) => write!(f, "EncodeForUri({e})"),
Self::StaticRegex(e, r) => write!(f, "StaticRegex({e}, {r})"),
Self::Year(e) => write!(f, "Year({e})"),
Self::Month(e) => write!(f, "Month({e})"),
Self::Day(e) => write!(f, "Day({e})"),
Self::Hours(e) => write!(f, "Hours({e})"),
Self::Minutes(e) => write!(f, "Minutes({e})"),
Self::Seconds(e) => write!(f, "Seconds({e})"),
Self::Timezone(e) => write!(f, "Timezone({e})"),
Self::Tz(e) => write!(f, "Tz({e})"),
Self::Md5(e) => write!(f, "Md5({e})"),
Self::Sha1(e) => write!(f, "Sha1({e})"),
Self::Sha256(e) => write!(f, "Sha256({e})"),
Self::Sha384(e) => write!(f, "Sha384({e})"),
Self::Sha512(e) => write!(f, "Sha512({e})"),
Self::IsIri(e) => write!(f, "IsIri({e})"),
Self::IsBlank(e) => write!(f, "IsBlank({e})"),
Self::IsLiteral(e) => write!(f, "IsLiteral({e})"),
Self::IsNumeric(e) => write!(f, "IsNumeric({e})"),
Self::IsTriple(e) => write!(f, "IsTriple({e})"),
Self::Subject(e) => write!(f, "Subject({e})"),
Self::Predicate(e) => write!(f, "Predicate({e})"),
Self::Object(e) => write!(f, "Object({e})"),
Self::BooleanCast(e) => write!(f, "BooleanCast({e})"),
Self::DoubleCast(e) => write!(f, "DoubleCast({e})"),
Self::FloatCast(e) => write!(f, "FloatCast({e})"),
Self::DecimalCast(e) => write!(f, "DecimalCast({e})"),
Self::IntegerCast(e) => write!(f, "IntegerCast({e})"),
Self::DateCast(e) => write!(f, "DateCast({e})"),
Self::TimeCast(e) => write!(f, "TimeCast({e})"),
Self::DateTimeCast(e) => write!(f, "DateTimeCast({e})"),
Self::DurationCast(e) => write!(f, "DurationCast({e})"),
Self::YearMonthDurationCast(e) => write!(f, "YearMonthDurationCast({e})"),
Self::DayTimeDurationCast(e) => write!(f, "DayTimeDurationCast({e})"),
Self::StringCast(e) => write!(f, "StringCast({e})"),
Self::Or(a, b) => write!(f, "Or({a}, {b})"),
Self::And(a, b) => write!(f, "And({a}, {b})"),
Self::Equal(a, b) => write!(f, "Equal({a}, {b})"),
Self::Greater(a, b) => write!(f, "Greater({a}, {b})"),
Self::GreaterOrEqual(a, b) => write!(f, "GreaterOrEqual({a}, {b})"),
Self::Less(a, b) => write!(f, "Less({a}, {b})"),
Self::LessOrEqual(a, b) => write!(f, "LessOrEqual({a}, {b})"),
Self::Add(a, b) => write!(f, "Add({a}, {b})"),
Self::Subtract(a, b) => write!(f, "Subtract({a}, {b})"),
Self::Multiply(a, b) => write!(f, "Multiply({a}, {b})"),
Self::Divide(a, b) => write!(f, "Divide({a}, {b})"),
Self::LangMatches(a, b) => write!(f, "LangMatches({a}, {b})"),
Self::Contains(a, b) => write!(f, "Contains({a}, {b})"),
Self::StaticReplace(a, b, c) => write!(f, "StaticReplace({a}, {b}, {c})"),
Self::StrStarts(a, b) => write!(f, "StrStarts({a}, {b})"),
Self::StrEnds(a, b) => write!(f, "StrEnds({a}, {b})"),
Self::StrBefore(a, b) => write!(f, "StrBefore({a}, {b})"),
Self::StrAfter(a, b) => write!(f, "StrAfter({a}, {b})"),
Self::StrLang(a, b) => write!(f, "StrLang({a}, {b})"),
Self::StrDt(a, b) => write!(f, "StrDt({a}, {b})"),
Self::SameTerm(a, b) => write!(f, "SameTerm({a}, {b})"),
Self::SubStr(a, b, None) => write!(f, "SubStr({a}, {b})"),
Self::DynamicRegex(a, b, None) => write!(f, "DynamicRegex({a}, {b})"),
Self::Adjust(a, b) => write!(f, "Adjust({a}, {b})"),
Self::If(a, b, c) => write!(f, "If({a}, {b}, {c})"),
Self::SubStr(a, b, Some(c)) => write!(f, "SubStr({a}, {b}, {c})"),
Self::DynamicRegex(a, b, Some(c)) => write!(f, "DynamicRegex({a}, {b}, {c})"),
Self::DynamicReplace(a, b, c, None) => write!(f, "DynamicReplace({a}, {b}, {c})"),
Self::Triple(a, b, c) => write!(f, "Triple({a}, {b}, {c})"),
Self::DynamicReplace(a, b, c, Some(d)) => {
write!(f, "DynamicReplace({a}, {b}, {c}, {d})")
}
Self::Concat(es) => {
write!(f, "Concat(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::Coalesce(es) => {
write!(f, "Coalesce(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::CustomFunction(name, es) => {
write!(f, "{name}(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::Exists(_) => write!(f, "Exists()"), //TODO
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct PlanAggregation { pub struct PlanAggregation {
pub function: PlanAggregationFunction, pub function: PlanAggregationFunction,
@ -632,6 +811,43 @@ pub struct PlanAggregation {
pub distinct: bool, pub distinct: bool,
} }
impl fmt::Display for PlanAggregation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.function {
PlanAggregationFunction::Count => {
write!(f, "Count")
}
PlanAggregationFunction::Sum => {
write!(f, "Sum")
}
PlanAggregationFunction::Min => {
write!(f, "Min")
}
PlanAggregationFunction::Max => {
write!(f, "Max")
}
PlanAggregationFunction::Avg => {
write!(f, "Avg")
}
PlanAggregationFunction::GroupConcat { .. } => {
write!(f, "GroupConcat")
}
PlanAggregationFunction::Sample => write!(f, "Sample"),
}?;
if self.distinct {
write!(f, "Distinct")?;
}
write!(f, "(")?;
if let Some(expr) = &self.parameter {
write!(f, "{expr}")?;
}
if let PlanAggregationFunction::GroupConcat { separator } = &self.function {
write!(f, "; separator={separator}")?;
}
write!(f, ")")
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum PlanAggregationFunction { pub enum PlanAggregationFunction {
Count, Count,
@ -655,12 +871,45 @@ pub enum PlanPropertyPath {
NegatedPropertySet(Rc<Vec<PlanTerm<NamedNode>>>), NegatedPropertySet(Rc<Vec<PlanTerm<NamedNode>>>),
} }
impl fmt::Display for PlanPropertyPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Path(p) => write!(f, "{p}"),
Self::Reverse(p) => write!(f, "Reverse({p})"),
Self::Sequence(a, b) => write!(f, "Sequence{a}, {b}"),
Self::Alternative(a, b) => write!(f, "Alternative{a}, {b}"),
Self::ZeroOrMore(p) => write!(f, "ZeroOrMore({p})"),
Self::OneOrMore(p) => write!(f, "OneOrMore({p})"),
Self::ZeroOrOne(p) => write!(f, "ZeroOrOne({p})"),
Self::NegatedPropertySet(ps) => {
write!(f, "NegatedPropertySet(")?;
for (i, p) in ps.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{p}")?;
}
write!(f, ")")
}
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Comparator { pub enum Comparator {
Asc(PlanExpression), Asc(PlanExpression),
Desc(PlanExpression), Desc(PlanExpression),
} }
impl fmt::Display for Comparator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Asc(c) => write!(f, "Asc({c})"),
Self::Desc(c) => write!(f, "Desc({c})"),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct TripleTemplate { pub struct TripleTemplate {
pub subject: TripleTemplateValue, pub subject: TripleTemplateValue,
@ -668,6 +917,12 @@ pub struct TripleTemplate {
pub object: TripleTemplateValue, pub object: TripleTemplateValue,
} }
impl fmt::Display for TripleTemplate {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {} {}", self.subject, self.predicate, self.object)
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum TripleTemplateValue { pub enum TripleTemplateValue {
Constant(PlanTerm<Term>), Constant(PlanTerm<Term>),
@ -676,6 +931,17 @@ pub enum TripleTemplateValue {
Triple(Box<TripleTemplate>), Triple(Box<TripleTemplate>),
} }
impl fmt::Display for TripleTemplateValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Constant(c) => write!(f, "{c}"),
Self::BlankNode(bn) => write!(f, "{bn}"),
Self::Variable(v) => write!(f, "{v}"),
Self::Triple(t) => write!(f, "<< {t} >>"),
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTuple { pub struct EncodedTuple {
inner: Vec<Option<EncodedTerm>>, inner: Vec<Option<EncodedTerm>>,
@ -761,3 +1027,135 @@ pub struct PlanNodeWithStats {
pub exec_count: Cell<usize>, pub exec_count: Cell<usize>,
pub exec_duration: Cell<Duration>, pub exec_duration: Cell<Duration>,
} }
impl PlanNodeWithStats {
pub fn json_node(
&self,
writer: &mut JsonWriter<impl io::Write>,
with_stats: bool,
) -> io::Result<()> {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("name"))?;
writer.write_event(JsonEvent::String(&self.node_label()))?;
if with_stats {
writer.write_event(JsonEvent::ObjectKey("number of results"))?;
writer.write_event(JsonEvent::Number(&self.exec_count.get().to_string()))?;
writer.write_event(JsonEvent::ObjectKey("duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&self.exec_duration.get().as_secs_f32().to_string(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("children"))?;
writer.write_event(JsonEvent::StartArray)?;
for child in &self.children {
child.json_node(writer, with_stats)?;
}
writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)
}
fn node_label(&self) -> String {
match self.node.as_ref() {
PlanNode::Aggregate {
key_variables,
aggregates,
..
} => format!(
"Aggregate({})",
key_variables
.iter()
.map(|c| c.to_string())
.chain(aggregates.iter().map(|(agg, v)| format!("{agg} -> {v}")))
.collect::<Vec<_>>()
.join(", ")
),
PlanNode::AntiJoin { .. } => "AntiJoin".to_owned(),
PlanNode::Extend {
expression,
variable,
..
} => format!("Extend({expression} -> {variable})"),
PlanNode::Filter { expression, .. } => format!("Filter({expression})"),
PlanNode::ForLoopJoin { .. } => "ForLoopJoin".to_owned(),
PlanNode::ForLoopLeftJoin { .. } => "ForLoopLeftJoin".to_owned(),
PlanNode::HashDeduplicate { .. } => "HashDeduplicate".to_owned(),
PlanNode::HashJoin { .. } => "HashJoin".to_owned(),
PlanNode::HashLeftJoin { expression, .. } => format!("HashLeftJoin({expression})"),
PlanNode::Limit { count, .. } => format!("Limit({count})"),
PlanNode::PathPattern {
subject,
path,
object,
graph_name,
} => format!("PathPattern({subject} {path} {object} {graph_name})"),
PlanNode::Project { mapping, .. } => {
format!(
"Project({})",
mapping
.iter()
.map(|(f, t)| if f.plain == t.plain {
f.to_string()
} else {
format!("{f} -> {t}")
})
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::QuadPattern {
subject,
predicate,
object,
graph_name,
} => format!("QuadPattern({subject} {predicate} {object} {graph_name})"),
PlanNode::Reduced { .. } => "Reduced".to_owned(),
PlanNode::Service {
service_name,
silent,
..
} => {
if *silent {
format!("SilentService({service_name})")
} else {
format!("Service({service_name})")
}
}
PlanNode::Skip { count, .. } => format!("Skip({count})"),
PlanNode::Sort { by, .. } => {
format!(
"Sort({})",
by.iter()
.map(|c| c.to_string())
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::StaticBindings { variables, .. } => {
format!(
"StaticBindings({})",
variables
.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::Union { .. } => "Union".to_owned(),
}
}
}
impl fmt::Debug for PlanNodeWithStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut obj = f.debug_struct("Node");
obj.field("name", &self.node_label());
if self.exec_duration.get() > Duration::default() {
obj.field("number of results", &self.exec_count.get());
obj.field("duration in seconds", &self.exec_duration.get());
}
if !self.children.is_empty() {
obj.field("children", &self.children);
}
obj.finish()
}
}

@ -130,11 +130,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
self.base_iri.clone(), self.base_iri.clone(),
self.options.query_options.service_handler(), self.options.query_options.service_handler(),
Rc::new(self.options.query_options.custom_functions.clone()), Rc::new(self.options.query_options.custom_functions.clone()),
false,
); );
let mut bnodes = HashMap::new(); let mut bnodes = HashMap::new();
let (eval, _) = evaluator.plan_evaluator(Rc::new(plan));
let tuples = let tuples =
evaluator.plan_evaluator(Rc::new(plan))(EncodedTuple::with_capacity(variables.len())) eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; //TODO: would be much better to stream
.collect::<Result<Vec<_>, _>>()?; //TODO: would be much better to stream
for tuple in tuples { for tuple in tuples {
for quad in delete { for quad in delete {
if let Some(quad) = if let Some(quad) =

@ -29,8 +29,8 @@ use crate::io::{
}; };
use crate::model::*; use crate::model::*;
use crate::sparql::{ use crate::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update, evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions,
UpdateOptions, QueryResults, Update, UpdateOptions,
}; };
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
@ -207,7 +207,37 @@ impl Store {
query: impl TryInto<Query, Error = impl Into<EvaluationError>>, query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions, options: QueryOptions,
) -> Result<QueryResults, EvaluationError> { ) -> Result<QueryResults, EvaluationError> {
evaluate_query(self.storage.snapshot(), query, options) let (results, _) = self.explain_query_opt(query, options, false)?;
results
}
/// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/) with some options and
/// returns a query explanation with some statistics (if enabled with the `with_stats` parameter).
///
/// Beware: if you want to compute statistics you need to exhaust the results iterator before having a look at them.
///
/// Usage example serialising the explanation with statistics in JSON:
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
///
/// let store = Store::new()?;
/// if let (Ok(QueryResults::Solutions(solutions)), explanation) = store.explain_query_opt("SELECT ?s WHERE { VALUES ?s { 1 2 3 } }", QueryOptions::default(), true)? {
/// // We make sure to have read all the solutions
/// for _ in solutions {
/// }
/// let mut buf = Vec::new();
/// explanation.write_in_json(&mut buf)?;
/// }
/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn explain_query_opt(
&self,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
with_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
evaluate_query(self.storage.snapshot(), query, options, with_stats)
} }
/// Retrieves quads with a filter on each quad component /// Retrieves quads with a filter on each quad component
@ -918,7 +948,8 @@ impl<'a> Transaction<'a> {
query: impl TryInto<Query, Error = impl Into<EvaluationError>>, query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions, options: QueryOptions,
) -> Result<QueryResults, EvaluationError> { ) -> Result<QueryResults, EvaluationError> {
evaluate_query(self.writer.reader(), query, options) let (results, _) = evaluate_query(self.writer.reader(), query, options, false)?;
results
} }
/// Retrieves quads with a filter on each quad component. /// Retrieves quads with a filter on each quad component.

@ -12,11 +12,11 @@ use std::fs::{create_dir, remove_dir_all, File};
use std::io::Cursor; use std::io::Cursor;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::io::Write; use std::io::Write;
#[cfg(not(target_family = "wasm"))] #[cfg(target_os = "linux")]
use std::iter::once; use std::iter::once;
#[cfg(not(target_family = "wasm"))] #[cfg(not(target_family = "wasm"))]
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
#[cfg(not(target_family = "wasm"))] #[cfg(target_os = "linux")]
use std::process::Command; use std::process::Command;
const DATA: &str = r#" const DATA: &str = r#"
@prefix schema: <http://schema.org/> . @prefix schema: <http://schema.org/> .
@ -501,7 +501,7 @@ fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
#[cfg(not(target_family = "wasm"))] #[cfg(target_os = "linux")]
fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> { fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> {
assert!(Command::new("git") assert!(Command::new("git")
.args(["clean", "-fX", dir]) .args(["clean", "-fX", dir])

@ -7,7 +7,7 @@ use oxigraph::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerialize
use oxigraph::model::{ use oxigraph::model::{
GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode, GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode,
}; };
use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::sparql::{Query, QueryOptions, QueryResults, Update};
use oxigraph::store::{BulkLoader, LoaderError, Store}; use oxigraph::store::{BulkLoader, LoaderError, Store};
use oxiri::Iri; use oxiri::Iri;
use rand::random; use rand::random;
@ -175,6 +175,23 @@ enum Command {
/// By default the format is guessed from the results file extension. /// By default the format is guessed from the results file extension.
#[arg(long, required_unless_present = "results_file")] #[arg(long, required_unless_present = "results_file")]
results_format: Option<String>, results_format: Option<String>,
/// Prints to stderr a human-readable explanation of the query evaluation.
///
/// Use the stats option to print also query evaluation statistics.
#[arg(long, conflicts_with = "explain_file")]
explain: bool,
/// Write to the given file an explanation of the query evaluation.
///
/// If the file extension is .json the JSON format is used, if .txt a human readable format is used.
///
/// Use the stats option to print also query evaluation statistics.
#[arg(long, conflicts_with = "explain")]
explain_file: Option<PathBuf>,
/// Computes some evaluation statistics to print as part of the query explanations.
///
/// Beware, computing the statistics adds some overhead to the evaluation runtime.
#[arg(long)]
stats: bool,
}, },
/// Executes a SPARQL update against the store. /// Executes a SPARQL update against the store.
Update { Update {
@ -424,6 +441,9 @@ pub fn main() -> anyhow::Result<()> {
query_base, query_base,
results_file, results_file,
results_format, results_format,
explain,
explain_file,
stats,
} => { } => {
let query = if let Some(query) = query { let query = if let Some(query) = query {
query query
@ -443,12 +463,16 @@ pub fn main() -> anyhow::Result<()> {
.location .location
.ok_or_else(|| anyhow!("The --location argument is required"))?, .ok_or_else(|| anyhow!("The --location argument is required"))?,
)?; )?;
match store.query(query)? { let (results, explanation) =
store.explain_query_opt(query, QueryOptions::default(), stats)?;
let print_result = (|| {
match results? {
QueryResults::Solutions(solutions) => { QueryResults::Solutions(solutions) => {
let format = if let Some(name) = results_format { let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) { if let Some(format) = QueryResultsFormat::from_extension(&name) {
format format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name) { } else if let Some(format) = QueryResultsFormat::from_media_type(&name)
{
format format
} else { } else {
bail!("The file format '{name}' is unknown") bail!("The file format '{name}' is unknown")
@ -485,7 +509,8 @@ pub fn main() -> anyhow::Result<()> {
let format = if let Some(name) = results_format { let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) { if let Some(format) = QueryResultsFormat::from_extension(&name) {
format format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name) { } else if let Some(format) = QueryResultsFormat::from_media_type(&name)
{
format format
} else { } else {
bail!("The file format '{name}' is unknown") bail!("The file format '{name}' is unknown")
@ -534,8 +559,8 @@ pub fn main() -> anyhow::Result<()> {
writer.finish()?; writer.finish()?;
} else { } else {
let stdout = stdout(); // Not needed in Rust 1.61 let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = let mut writer = GraphSerializer::from_format(format)
GraphSerializer::from_format(format).triple_writer(stdout.lock())?; .triple_writer(stdout.lock())?;
for triple in triples { for triple in triples {
writer.write(triple?.as_ref())?; writer.write(triple?.as_ref())?;
} }
@ -544,6 +569,24 @@ pub fn main() -> anyhow::Result<()> {
} }
} }
Ok(()) Ok(())
})();
if let Some(explain_file) = explain_file {
let mut file = BufWriter::new(File::create(&explain_file)?);
match explain_file
.extension()
.and_then(|e| e.to_str()) {
Some("json") => {
explanation.write_in_json(file)?;
},
Some("txt") => {
write!(file, "{:?}", explanation)?;
},
_ => bail!("The given explanation file {} must have an extension that is .json or .txt", explain_file.display())
}
} else if explain || stats {
eprintln!("{:?}", explanation);
}
print_result
} }
Command::Update { Command::Update {
update, update,

Loading…
Cancel
Save