From 60ffd99ad8f9dca53c519282a19947d046ccf4df Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 11 Apr 2023 13:57:58 +0200 Subject: [PATCH] Exposes the query profiler to the CLI app --- Cargo.lock | 2 + bench/explanation_to_flamegraph.py | 56 ++++ bench/explanation_to_trace.py | 52 ++++ lib/Cargo.toml | 2 + lib/src/sparql/algebra.rs | 13 +- lib/src/sparql/eval.rs | 184 ++++++++----- lib/src/sparql/mod.rs | 84 +++++- lib/src/sparql/plan.rs | 398 +++++++++++++++++++++++++++++ lib/src/sparql/update.rs | 5 +- lib/src/store.rs | 39 ++- lib/tests/store.rs | 6 +- server/src/main.rs | 223 +++++++++------- 12 files changed, 883 insertions(+), 181 deletions(-) create mode 100644 bench/explanation_to_flamegraph.py create mode 100644 bench/explanation_to_trace.py diff --git a/Cargo.lock b/Cargo.lock index 7f54277d..2309ef2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -932,6 +932,8 @@ dependencies = [ "digest", "getrandom", "hex", + "js-sys", + "json-event-parser", "lazy_static", "libc", "md-5", diff --git a/bench/explanation_to_flamegraph.py b/bench/explanation_to_flamegraph.py new file mode 100644 index 00000000..c65fde30 --- /dev/null +++ b/bench/explanation_to_flamegraph.py @@ -0,0 +1,56 @@ +""" +Converts a SPARQL query JSON explanation file to a flamegraph. +Usage: python explanation_to_flamegraph.py explanation.json flamegraph.svg +""" +import json +import subprocess +from argparse import ArgumentParser +from pathlib import Path +from shutil import which +from tempfile import NamedTemporaryFile + +if which('flamegraph.pl') is None: + raise Exception( + 'This script requires the flamegraph.pl script from https://github.com/brendangregg/FlameGraph to be installed and be in $PATH.') + +parser = ArgumentParser( + prog='OxigraphFlamegraph', + description='Builds a flamegraph from the Oxigraph query explanation JSON format', + epilog='Text at the bottom of help') +parser.add_argument('json_explanation', type=Path) +parser.add_argument('flamegraph_svg', type=Path) +args = parser.parse_args() + + +def trace_line(label: str, value: float): + return f"{label} {int(value * 1_000_000)}" + + +with args.json_explanation.open('rt') as fp: + explanation = json.load(fp) +trace = [] +if "parsing duration in seconds" in explanation: + trace.append(trace_line("parsing", explanation['parsing duration in seconds'])) +if "planning duration in seconds" in explanation: + trace.append(trace_line("planning", explanation['planning duration in seconds'])) +already_used_names = {} + + +def add_to_trace(node, path): + path = f"{path};{node['name'].replace(' ', '`')}" + if path in already_used_names: + already_used_names[path] += 1 + path = f"{path}`{already_used_names[path]}" + else: + already_used_names[path] = 0 + samples = node['duration in seconds'] - sum(child['duration in seconds'] for child in node.get("children", ())) + trace.append(trace_line(path, samples)) + for i, child in enumerate(node.get("children", ())): + add_to_trace(child, path) + + +add_to_trace(explanation["plan"], 'eval') +with NamedTemporaryFile('w+t') as fp: + fp.write('\n'.join(trace)) + fp.flush() + args.flamegraph_svg.write_bytes(subprocess.run(['flamegraph.pl', fp.name], stdout=subprocess.PIPE).stdout) diff --git a/bench/explanation_to_trace.py b/bench/explanation_to_trace.py new file mode 100644 index 00000000..e8f7a239 --- /dev/null +++ b/bench/explanation_to_trace.py @@ -0,0 +1,52 @@ +""" +Converts a SPARQL query JSON explanation file to a tracing event file compatible with Chrome. +Usage: python explanation_to_trace.py explanation.json trace.json +""" +import json +from argparse import ArgumentParser +from pathlib import Path + +parser = ArgumentParser( + prog='OxigraphTracing', + description='Builds a Trace Event Format file from the Oxigraph query explanation JSON format') +parser.add_argument('json_explanation', type=Path) +parser.add_argument('json_trace_event', type=Path) +args = parser.parse_args() + +with args.json_explanation.open('rt') as fp: + explanation = json.load(fp) +trace = [] + + +def trace_element(name: str, cat: str, start_s: float, duration_s: float): + return { + "name": name, + "cat": cat, + "ph": "X", + "ts": int(start_s * 1_000_000), + "dur": int(duration_s * 1_000_000), + "pid": 1 + } + + +def add_to_trace(node, path, start_time: float): + path = f"{path};{node['name'].replace(' ', '`')}" + trace.append(trace_element(node["name"], node["name"].split("(")[0], start_time, node["duration in seconds"])) + for child in node.get("children", ()): + add_to_trace(child, path, start_time) + start_time += child["duration in seconds"] + + +current_time = 0 +if "parsing duration in seconds" in explanation: + d = explanation["parsing duration in seconds"] + trace.append(trace_element(f"parsing", "parsing", current_time, d)) + current_time += d +if "planning duration in seconds" in explanation: + d = explanation["planning duration in seconds"] + trace.append(trace_element(f"planning", "planning", current_time, d)) + current_time += d +add_to_trace(explanation["plan"], 'eval', current_time) + +with args.json_trace_event.open("wt") as fp: + json.dump(trace, fp) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 83c65895..2f9b8d95 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -36,6 +36,7 @@ rio_xml = "0.8" hex = "0.4" siphasher = "0.3" lazy_static = "1" +json-event-parser = "0.1" oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" } spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } @@ -48,6 +49,7 @@ oxhttp = { version = "0.1", optional = true } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] getrandom = { version = "0.2", features = ["js"] } +js-sys = "0.3" [target.'cfg(not(target_family = "wasm"))'.dev-dependencies] criterion = "0.4" diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index dee08ec6..41fad114 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -5,9 +5,11 @@ //! Warning: this implementation is an unstable work in progress use crate::model::*; +use crate::sparql::eval::Timer; use spargebra::GraphUpdateOperation; use std::fmt; use std::str::FromStr; +use std::time::Duration; /// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/). /// @@ -30,13 +32,19 @@ use std::str::FromStr; pub struct Query { pub(super) inner: spargebra::Query, pub(super) dataset: QueryDataset, + pub(super) parsing_duration: Option, } impl Query { /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. pub fn parse(query: &str, base_iri: Option<&str>) -> Result { - let query = spargebra::Query::parse(query, base_iri)?; - Ok(query.into()) + let start = Timer::now(); + let query = Self::from(spargebra::Query::parse(query, base_iri)?); + Ok(Self { + dataset: query.dataset, + inner: query.inner, + parsing_duration: Some(start.elapsed()), + }) } /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) @@ -90,6 +98,7 @@ impl From for Query { | spargebra::Query::Ask { dataset, .. } => dataset, }), inner: query, + parsing_duration: None, } } } diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 67de2aac..9b01b45c 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -28,6 +28,8 @@ use std::iter::Iterator; use std::iter::{empty, once}; use std::rc::Rc; use std::str; +use std::time::Duration as StdDuration; +#[cfg(not(all(target_family = "wasm", target_os = "unknown")))] use std::time::Instant; const REGEX_SIZE_LIMIT: usize = 1_000_000; @@ -42,6 +44,7 @@ pub struct SimpleEvaluator { now: DateTime, service_handler: Rc>, custom_functions: Rc, + run_stats: bool, } impl SimpleEvaluator { @@ -50,6 +53,7 @@ impl SimpleEvaluator { base_iri: Option>>, service_handler: Rc>, custom_functions: Rc, + run_stats: bool, ) -> Self { Self { dataset, @@ -57,6 +61,7 @@ impl SimpleEvaluator { now: DateTime::now().unwrap(), service_handler, custom_functions, + run_stats, } } @@ -64,69 +69,76 @@ impl SimpleEvaluator { &self, plan: Rc, variables: Rc>, - ) -> QueryResults { - let iter = self.plan_evaluator(plan)(EncodedTuple::with_capacity(variables.len())); - QueryResults::Solutions(decode_bindings(self.dataset.clone(), iter, variables)) + ) -> (QueryResults, Rc) { + let (eval, stats) = self.plan_evaluator(plan); + ( + QueryResults::Solutions(decode_bindings( + self.dataset.clone(), + eval(EncodedTuple::with_capacity(variables.len())), + variables, + )), + stats, + ) } - pub fn evaluate_ask_plan(&self, plan: Rc) -> Result { + pub fn evaluate_ask_plan( + &self, + plan: Rc, + ) -> (Result, Rc) { let from = EncodedTuple::with_capacity(plan.used_variables().len()); - match self.plan_evaluator(plan)(from).next() { - Some(Ok(_)) => Ok(QueryResults::Boolean(true)), - Some(Err(error)) => Err(error), - None => Ok(QueryResults::Boolean(false)), - } + let (eval, stats) = self.plan_evaluator(plan); + ( + match eval(from).next() { + Some(Ok(_)) => Ok(QueryResults::Boolean(true)), + Some(Err(error)) => Err(error), + None => Ok(QueryResults::Boolean(false)), + }, + stats, + ) } pub fn evaluate_construct_plan( &self, plan: Rc, template: Vec, - ) -> QueryResults { + ) -> (QueryResults, Rc) { let from = EncodedTuple::with_capacity(plan.used_variables().len()); - QueryResults::Graph(QueryTripleIter { - iter: Box::new(ConstructIterator { - eval: self.clone(), - iter: self.plan_evaluator(plan)(from), - template, - buffered_results: Vec::default(), - bnodes: Vec::default(), + let (eval, stats) = self.plan_evaluator(plan); + ( + QueryResults::Graph(QueryTripleIter { + iter: Box::new(ConstructIterator { + eval: self.clone(), + iter: eval(from), + template, + buffered_results: Vec::default(), + bnodes: Vec::default(), + }), }), - }) + stats, + ) } - pub fn evaluate_describe_plan(&self, plan: Rc) -> QueryResults { + pub fn evaluate_describe_plan( + &self, + plan: Rc, + ) -> (QueryResults, Rc) { let from = EncodedTuple::with_capacity(plan.used_variables().len()); - QueryResults::Graph(QueryTripleIter { - iter: Box::new(DescribeIterator { - eval: self.clone(), - iter: self.plan_evaluator(plan)(from), - quads: Box::new(empty()), + let (eval, stats) = self.plan_evaluator(plan); + ( + QueryResults::Graph(QueryTripleIter { + iter: Box::new(DescribeIterator { + eval: self.clone(), + iter: eval(from), + quads: Box::new(empty()), + }), }), - }) + stats, + ) } pub fn plan_evaluator( &self, node: Rc, - ) -> Rc EncodedTuplesIterator> { - self.build_plan_evaluator(node, false).0 - } - - pub fn plan_evaluator_with_stats( - &self, - node: Rc, - ) -> ( - Rc EncodedTuplesIterator>, - Rc, - ) { - self.build_plan_evaluator(node, true) - } - - fn build_plan_evaluator( - &self, - node: Rc, - run_stats: bool, ) -> ( Rc EncodedTuplesIterator>, Rc, @@ -372,9 +384,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); + let (left, left_stats) = self.plan_evaluator(left.clone()); stat_children.push(left_stats); - let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); + let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); if join_keys.is_empty() { // Cartesian product @@ -418,9 +430,9 @@ impl SimpleEvaluator { } } PlanNode::ForLoopJoin { left, right } => { - let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); + let (left, left_stats) = self.plan_evaluator(left.clone()); stat_children.push(left_stats); - let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); + let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); Rc::new(move |from| { let right = right.clone(); @@ -436,9 +448,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); + let (left, left_stats) = self.plan_evaluator(left.clone()); stat_children.push(left_stats); - let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); + let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); if join_keys.is_empty() { Rc::new(move |from| { @@ -479,9 +491,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); + let (left, left_stats) = self.plan_evaluator(left.clone()); stat_children.push(left_stats); - let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); + let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); let expression = self.expression_evaluator(expression); // Real hash join @@ -508,9 +520,9 @@ impl SimpleEvaluator { right, possible_problem_vars, } => { - let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats); + let (left, left_stats) = self.plan_evaluator(left.clone()); stat_children.push(left_stats); - let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats); + let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); let possible_problem_vars = possible_problem_vars.clone(); Rc::new(move |from| { @@ -533,7 +545,7 @@ impl SimpleEvaluator { }) } PlanNode::Filter { child, expression } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let expression = self.expression_evaluator(expression); Rc::new(move |from| { @@ -552,8 +564,7 @@ impl SimpleEvaluator { let children: Vec<_> = children .iter() .map(|child| { - let (child, child_stats) = - self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); child }) @@ -572,7 +583,7 @@ impl SimpleEvaluator { variable, expression, } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let position = variable.encoded; let expression = self.expression_evaluator(expression); @@ -588,7 +599,7 @@ impl SimpleEvaluator { }) } PlanNode::Sort { child, by } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let by: Vec<_> = by .iter() @@ -646,12 +657,12 @@ impl SimpleEvaluator { }) } PlanNode::HashDeduplicate { child } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); Rc::new(move |from| Box::new(hash_deduplicate(child(from)))) } PlanNode::Reduced { child } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); Rc::new(move |from| { Box::new(ConsecutiveDeduplication { @@ -661,19 +672,19 @@ impl SimpleEvaluator { }) } PlanNode::Skip { child, count } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).skip(count))) } PlanNode::Limit { child, count } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).take(count))) } PlanNode::Project { child, mapping } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let mapping = mapping.clone(); Rc::new(move |from| { @@ -713,7 +724,7 @@ impl SimpleEvaluator { key_variables, aggregates, } => { - let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats); + let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let key_variables = key_variables.clone(); let aggregate_input_expressions: Vec<_> = aggregates @@ -810,10 +821,10 @@ impl SimpleEvaluator { exec_count: Cell::new(0), exec_duration: Cell::new(std::time::Duration::from_secs(0)), }); - if run_stats { + if self.run_stats { let stats = stats.clone(); evaluator = Rc::new(move |tuple| { - let start = Instant::now(); + let start = Timer::now(); let inner = evaluator(tuple); stats .exec_duration @@ -845,6 +856,7 @@ impl SimpleEvaluator { base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()), }, dataset: QueryDataset::new(), + parsing_duration: None, }, )? { Ok(encode_bindings(self.dataset.clone(), variables, iter)) @@ -933,7 +945,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| tuple.get(v).cloned()) } PlanExpression::Exists(plan) => { - let eval = self.plan_evaluator(plan.clone()); + let (eval, _) = self.plan_evaluator(plan.clone()); //TODO: stats Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) } PlanExpression::Or(a, b) => { @@ -4775,7 +4787,7 @@ impl Iterator for StatsIterator { type Item = Result; fn next(&mut self) -> Option> { - let start = Instant::now(); + let start = Timer::now(); let result = self.inner.next(); self.stats .exec_duration @@ -4787,6 +4799,42 @@ impl Iterator for StatsIterator { } } +#[cfg(all(target_family = "wasm", target_os = "unknown"))] +pub struct Timer { + timestamp_ms: f64, +} + +#[cfg(all(target_family = "wasm", target_os = "unknown"))] +impl Timer { + pub fn now() -> Self { + Self { + timestamp_ms: js_sys::Date::now(), + } + } + + pub fn elapsed(&self) -> StdDuration { + StdDuration::from_secs_f64((js_sys::Date::now() - self.timestamp_ms) / 1000.) + } +} + +#[cfg(not(all(target_family = "wasm", target_os = "unknown")))] +pub struct Timer { + instant: Instant, +} + +#[cfg(not(all(target_family = "wasm", target_os = "unknown")))] +impl Timer { + pub fn now() -> Self { + Self { + instant: Instant::now(), + } + } + + pub fn elapsed(&self) -> StdDuration { + self.instant.elapsed() + } +} + #[test] fn uuid() { let mut buffer = String::default(); diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 89fa2e94..93fda294 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -17,29 +17,34 @@ use crate::model::{NamedNode, Term}; pub use crate::sparql::algebra::{Query, QueryDataset, Update}; use crate::sparql::dataset::DatasetView; pub use crate::sparql::error::{EvaluationError, QueryError}; -use crate::sparql::eval::SimpleEvaluator; +use crate::sparql::eval::{SimpleEvaluator, Timer}; pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; +use crate::sparql::plan::PlanNodeWithStats; use crate::sparql::plan_builder::PlanBuilder; pub use crate::sparql::service::ServiceHandler; use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; pub(crate) use crate::sparql::update::evaluate_update; use crate::storage::StorageReader; +use json_event_parser::{JsonEvent, JsonWriter}; pub use oxrdf::{Variable, VariableNameParseError}; pub use sparesults::QueryResultsFormat; pub use spargebra::ParseError; use std::collections::HashMap; use std::rc::Rc; use std::time::Duration; +use std::{fmt, io}; #[allow(clippy::needless_pass_by_value)] pub(crate) fn evaluate_query( reader: StorageReader, query: impl TryInto>, options: QueryOptions, -) -> Result { + run_stats: bool, +) -> Result<(Result, QueryExplanation), EvaluationError> { let query = query.try_into().map_err(Into::into)?; let dataset = DatasetView::new(reader, &query.dataset); - match query.inner { + let start_planning = Timer::now(); + let (results, plan_node_with_stats, planning_duration) = match query.inner { spargebra::Query::Select { pattern, base_iri, .. } => { @@ -50,13 +55,16 @@ pub(crate) fn evaluate_query( &options.custom_functions, options.without_optimizations, )?; - Ok(SimpleEvaluator::new( + let planning_duration = start_planning.elapsed(); + let (results, explanation) = SimpleEvaluator::new( Rc::new(dataset), base_iri.map(Rc::new), options.service_handler(), Rc::new(options.custom_functions), + run_stats, ) - .evaluate_select_plan(Rc::new(plan), Rc::new(variables))) + .evaluate_select_plan(Rc::new(plan), Rc::new(variables)); + (Ok(results), explanation, planning_duration) } spargebra::Query::Ask { pattern, base_iri, .. @@ -68,13 +76,16 @@ pub(crate) fn evaluate_query( &options.custom_functions, options.without_optimizations, )?; - SimpleEvaluator::new( + let planning_duration = start_planning.elapsed(); + let (results, explanation) = SimpleEvaluator::new( Rc::new(dataset), base_iri.map(Rc::new), options.service_handler(), Rc::new(options.custom_functions), + run_stats, ) - .evaluate_ask_plan(Rc::new(plan)) + .evaluate_ask_plan(Rc::new(plan)); + (results, explanation, planning_duration) } spargebra::Query::Construct { template, @@ -96,13 +107,16 @@ pub(crate) fn evaluate_query( &options.custom_functions, options.without_optimizations, ); - Ok(SimpleEvaluator::new( + let planning_duration = start_planning.elapsed(); + let (results, explanation) = SimpleEvaluator::new( Rc::new(dataset), base_iri.map(Rc::new), options.service_handler(), Rc::new(options.custom_functions), + run_stats, ) - .evaluate_construct_plan(Rc::new(plan), construct)) + .evaluate_construct_plan(Rc::new(plan), construct); + (Ok(results), explanation, planning_duration) } spargebra::Query::Describe { pattern, base_iri, .. @@ -114,15 +128,25 @@ pub(crate) fn evaluate_query( &options.custom_functions, options.without_optimizations, )?; - Ok(SimpleEvaluator::new( + let planning_duration = start_planning.elapsed(); + let (results, explanation) = SimpleEvaluator::new( Rc::new(dataset), base_iri.map(Rc::new), options.service_handler(), Rc::new(options.custom_functions), + run_stats, ) - .evaluate_describe_plan(Rc::new(plan))) + .evaluate_describe_plan(Rc::new(plan)); + (Ok(results), explanation, planning_duration) } - } + }; + let explanation = QueryExplanation { + inner: plan_node_with_stats, + with_stats: run_stats, + parsing_duration: query.parsing_duration, + planning_duration, + }; + Ok((results, explanation)) } /// Options for SPARQL query evaluation. @@ -257,3 +281,39 @@ impl From for UpdateOptions { Self { query_options } } } + +/// The explanation of a query. +#[derive(Clone)] +pub struct QueryExplanation { + inner: Rc, + with_stats: bool, + parsing_duration: Option, + planning_duration: Duration, +} + +impl QueryExplanation { + /// Writes the explanation as JSON. + pub fn write_in_json(&self, output: impl io::Write) -> io::Result<()> { + let mut writer = JsonWriter::from_writer(output); + writer.write_event(JsonEvent::StartObject)?; + if let Some(parsing_duration) = self.parsing_duration { + writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds"))?; + writer.write_event(JsonEvent::Number( + &parsing_duration.as_secs_f32().to_string(), + ))?; + } + writer.write_event(JsonEvent::ObjectKey("planning duration in seconds"))?; + writer.write_event(JsonEvent::Number( + &self.planning_duration.as_secs_f32().to_string(), + ))?; + writer.write_event(JsonEvent::ObjectKey("plan"))?; + self.inner.json_node(&mut writer, self.with_stats)?; + writer.write_event(JsonEvent::EndObject) + } +} + +impl fmt::Debug for QueryExplanation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.inner) + } +} diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 9db35492..a4791ccf 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -1,6 +1,7 @@ use crate::model::{BlankNode, Literal, NamedNode, Term, Triple}; use crate::sparql::Variable; use crate::storage::numeric_encoder::EncodedTerm; +use json_event_parser::{JsonEvent, JsonWriter}; use regex::Regex; use spargebra::algebra::GraphPattern; use spargebra::term::GroundTerm; @@ -10,6 +11,7 @@ use std::collections::btree_map::Entry; use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; use std::time::Duration; +use std::{fmt, io}; #[derive(Debug)] pub enum PlanNode { @@ -387,6 +389,12 @@ pub struct PlanTerm { pub plain: T, } +impl fmt::Display for PlanTerm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.plain) + } +} + #[derive(Debug, Clone)] pub enum PatternValue { Constant(PlanTerm), @@ -394,6 +402,16 @@ pub enum PatternValue { TriplePattern(Box), } +impl fmt::Display for PatternValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Constant(c) => write!(f, "{c}"), + Self::Variable(v) => write!(f, "{v}"), + Self::TriplePattern(p) => write!(f, "{p}"), + } + } +} + #[derive(Debug, Clone)] pub enum PatternValueConstant { NamedNode(NamedNode), @@ -402,6 +420,17 @@ pub enum PatternValueConstant { DefaultGraph, } +impl fmt::Display for PatternValueConstant { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NamedNode(n) => write!(f, "{n}"), + Self::Literal(l) => write!(f, "{l}"), + Self::Triple(t) => write!(f, "<< {t} >>"), + Self::DefaultGraph => f.write_str("DEFAULT"), + } + } +} + #[derive(Debug, Clone)] pub struct TriplePatternValue { pub subject: PatternValue, @@ -409,12 +438,24 @@ pub struct TriplePatternValue { pub object: PatternValue, } +impl fmt::Display for TriplePatternValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + #[derive(Debug, Clone)] pub struct PlanVariable

{ pub encoded: usize, pub plain: P, } +impl fmt::Display for PlanVariable

{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.plain) + } +} + #[derive(Debug, Clone)] pub enum PlanExpression { NamedNode(PlanTerm), @@ -625,6 +666,144 @@ impl PlanExpression { } } +impl fmt::Display for PlanExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Variable(v) => { + write!(f, "{v}") + } + Self::Bound(v) => { + write!(f, "Bound({v})") + } + Self::NamedNode(n) => write!(f, "{n}"), + Self::Literal(l) => write!(f, "{l}"), + Self::Rand => write!(f, "Rand()"), + Self::Now => write!(f, "Now()"), + Self::Uuid => write!(f, "Uuid()"), + Self::StrUuid => write!(f, "StrUuid()"), + Self::UnaryPlus(e) => write!(f, "UnaryPlus({e})"), + Self::UnaryMinus(e) => write!(f, "UnaryMinus({e})"), + Self::Not(e) => write!(f, "Not({e})"), + Self::BNode(e) => { + if let Some(e) = e { + write!(f, "BNode({e})") + } else { + write!(f, "BNode()") + } + } + Self::Str(e) => write!(f, "Str({e})"), + Self::Lang(e) => write!(f, "Lang({e})"), + Self::Datatype(e) => write!(f, "Datatype({e})"), + Self::Iri(e) => write!(f, "Iri({e})"), + Self::Abs(e) => write!(f, "Abs({e})"), + Self::Ceil(e) => write!(f, "Ceil({e})"), + Self::Floor(e) => write!(f, "Floor({e})"), + Self::Round(e) => write!(f, "Round({e})"), + Self::UCase(e) => write!(f, "UCase({e})"), + Self::LCase(e) => write!(f, "LCase({e})"), + Self::StrLen(e) => write!(f, "StrLen({e})"), + Self::EncodeForUri(e) => write!(f, "EncodeForUri({e})"), + Self::StaticRegex(e, r) => write!(f, "StaticRegex({e}, {r})"), + Self::Year(e) => write!(f, "Year({e})"), + Self::Month(e) => write!(f, "Month({e})"), + Self::Day(e) => write!(f, "Day({e})"), + Self::Hours(e) => write!(f, "Hours({e})"), + Self::Minutes(e) => write!(f, "Minutes({e})"), + Self::Seconds(e) => write!(f, "Seconds({e})"), + Self::Timezone(e) => write!(f, "Timezone({e})"), + Self::Tz(e) => write!(f, "Tz({e})"), + Self::Md5(e) => write!(f, "Md5({e})"), + Self::Sha1(e) => write!(f, "Sha1({e})"), + Self::Sha256(e) => write!(f, "Sha256({e})"), + Self::Sha384(e) => write!(f, "Sha384({e})"), + Self::Sha512(e) => write!(f, "Sha512({e})"), + Self::IsIri(e) => write!(f, "IsIri({e})"), + Self::IsBlank(e) => write!(f, "IsBlank({e})"), + Self::IsLiteral(e) => write!(f, "IsLiteral({e})"), + Self::IsNumeric(e) => write!(f, "IsNumeric({e})"), + Self::IsTriple(e) => write!(f, "IsTriple({e})"), + Self::Subject(e) => write!(f, "Subject({e})"), + Self::Predicate(e) => write!(f, "Predicate({e})"), + Self::Object(e) => write!(f, "Object({e})"), + Self::BooleanCast(e) => write!(f, "BooleanCast({e})"), + Self::DoubleCast(e) => write!(f, "DoubleCast({e})"), + Self::FloatCast(e) => write!(f, "FloatCast({e})"), + Self::DecimalCast(e) => write!(f, "DecimalCast({e})"), + Self::IntegerCast(e) => write!(f, "IntegerCast({e})"), + Self::DateCast(e) => write!(f, "DateCast({e})"), + Self::TimeCast(e) => write!(f, "TimeCast({e})"), + Self::DateTimeCast(e) => write!(f, "DateTimeCast({e})"), + Self::DurationCast(e) => write!(f, "DurationCast({e})"), + Self::YearMonthDurationCast(e) => write!(f, "YearMonthDurationCast({e})"), + Self::DayTimeDurationCast(e) => write!(f, "DayTimeDurationCast({e})"), + Self::StringCast(e) => write!(f, "StringCast({e})"), + Self::Or(a, b) => write!(f, "Or({a}, {b})"), + Self::And(a, b) => write!(f, "And({a}, {b})"), + Self::Equal(a, b) => write!(f, "Equal({a}, {b})"), + Self::Greater(a, b) => write!(f, "Greater({a}, {b})"), + Self::GreaterOrEqual(a, b) => write!(f, "GreaterOrEqual({a}, {b})"), + Self::Less(a, b) => write!(f, "Less({a}, {b})"), + Self::LessOrEqual(a, b) => write!(f, "LessOrEqual({a}, {b})"), + Self::Add(a, b) => write!(f, "Add({a}, {b})"), + Self::Subtract(a, b) => write!(f, "Subtract({a}, {b})"), + Self::Multiply(a, b) => write!(f, "Multiply({a}, {b})"), + Self::Divide(a, b) => write!(f, "Divide({a}, {b})"), + Self::LangMatches(a, b) => write!(f, "LangMatches({a}, {b})"), + Self::Contains(a, b) => write!(f, "Contains({a}, {b})"), + Self::StaticReplace(a, b, c) => write!(f, "StaticReplace({a}, {b}, {c})"), + Self::StrStarts(a, b) => write!(f, "StrStarts({a}, {b})"), + Self::StrEnds(a, b) => write!(f, "StrEnds({a}, {b})"), + Self::StrBefore(a, b) => write!(f, "StrBefore({a}, {b})"), + Self::StrAfter(a, b) => write!(f, "StrAfter({a}, {b})"), + Self::StrLang(a, b) => write!(f, "StrLang({a}, {b})"), + Self::StrDt(a, b) => write!(f, "StrDt({a}, {b})"), + Self::SameTerm(a, b) => write!(f, "SameTerm({a}, {b})"), + Self::SubStr(a, b, None) => write!(f, "SubStr({a}, {b})"), + Self::DynamicRegex(a, b, None) => write!(f, "DynamicRegex({a}, {b})"), + Self::Adjust(a, b) => write!(f, "Adjust({a}, {b})"), + Self::If(a, b, c) => write!(f, "If({a}, {b}, {c})"), + Self::SubStr(a, b, Some(c)) => write!(f, "SubStr({a}, {b}, {c})"), + Self::DynamicRegex(a, b, Some(c)) => write!(f, "DynamicRegex({a}, {b}, {c})"), + Self::DynamicReplace(a, b, c, None) => write!(f, "DynamicReplace({a}, {b}, {c})"), + Self::Triple(a, b, c) => write!(f, "Triple({a}, {b}, {c})"), + Self::DynamicReplace(a, b, c, Some(d)) => { + write!(f, "DynamicReplace({a}, {b}, {c}, {d})") + } + Self::Concat(es) => { + write!(f, "Concat(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } + Self::Coalesce(es) => { + write!(f, "Coalesce(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } + Self::CustomFunction(name, es) => { + write!(f, "{name}(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } + Self::Exists(_) => write!(f, "Exists()"), //TODO + } + } +} + #[derive(Debug, Clone)] pub struct PlanAggregation { pub function: PlanAggregationFunction, @@ -632,6 +811,43 @@ pub struct PlanAggregation { pub distinct: bool, } +impl fmt::Display for PlanAggregation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.function { + PlanAggregationFunction::Count => { + write!(f, "Count") + } + PlanAggregationFunction::Sum => { + write!(f, "Sum") + } + PlanAggregationFunction::Min => { + write!(f, "Min") + } + PlanAggregationFunction::Max => { + write!(f, "Max") + } + PlanAggregationFunction::Avg => { + write!(f, "Avg") + } + PlanAggregationFunction::GroupConcat { .. } => { + write!(f, "GroupConcat") + } + PlanAggregationFunction::Sample => write!(f, "Sample"), + }?; + if self.distinct { + write!(f, "Distinct")?; + } + write!(f, "(")?; + if let Some(expr) = &self.parameter { + write!(f, "{expr}")?; + } + if let PlanAggregationFunction::GroupConcat { separator } = &self.function { + write!(f, "; separator={separator}")?; + } + write!(f, ")") + } +} + #[derive(Debug, Clone)] pub enum PlanAggregationFunction { Count, @@ -655,12 +871,45 @@ pub enum PlanPropertyPath { NegatedPropertySet(Rc>>), } +impl fmt::Display for PlanPropertyPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Path(p) => write!(f, "{p}"), + Self::Reverse(p) => write!(f, "Reverse({p})"), + Self::Sequence(a, b) => write!(f, "Sequence{a}, {b}"), + Self::Alternative(a, b) => write!(f, "Alternative{a}, {b}"), + Self::ZeroOrMore(p) => write!(f, "ZeroOrMore({p})"), + Self::OneOrMore(p) => write!(f, "OneOrMore({p})"), + Self::ZeroOrOne(p) => write!(f, "ZeroOrOne({p})"), + Self::NegatedPropertySet(ps) => { + write!(f, "NegatedPropertySet(")?; + for (i, p) in ps.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{p}")?; + } + write!(f, ")") + } + } + } +} + #[derive(Debug, Clone)] pub enum Comparator { Asc(PlanExpression), Desc(PlanExpression), } +impl fmt::Display for Comparator { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Asc(c) => write!(f, "Asc({c})"), + Self::Desc(c) => write!(f, "Desc({c})"), + } + } +} + #[derive(Debug, Clone)] pub struct TripleTemplate { pub subject: TripleTemplateValue, @@ -668,6 +917,12 @@ pub struct TripleTemplate { pub object: TripleTemplateValue, } +impl fmt::Display for TripleTemplate { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.subject, self.predicate, self.object) + } +} + #[derive(Debug, Clone)] pub enum TripleTemplateValue { Constant(PlanTerm), @@ -676,6 +931,17 @@ pub enum TripleTemplateValue { Triple(Box), } +impl fmt::Display for TripleTemplateValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Constant(c) => write!(f, "{c}"), + Self::BlankNode(bn) => write!(f, "{bn}"), + Self::Variable(v) => write!(f, "{v}"), + Self::Triple(t) => write!(f, "<< {t} >>"), + } + } +} + #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct EncodedTuple { inner: Vec>, @@ -761,3 +1027,135 @@ pub struct PlanNodeWithStats { pub exec_count: Cell, pub exec_duration: Cell, } + +impl PlanNodeWithStats { + pub fn json_node( + &self, + writer: &mut JsonWriter, + with_stats: bool, + ) -> io::Result<()> { + writer.write_event(JsonEvent::StartObject)?; + writer.write_event(JsonEvent::ObjectKey("name"))?; + writer.write_event(JsonEvent::String(&self.node_label()))?; + if with_stats { + writer.write_event(JsonEvent::ObjectKey("number of results"))?; + writer.write_event(JsonEvent::Number(&self.exec_count.get().to_string()))?; + writer.write_event(JsonEvent::ObjectKey("duration in seconds"))?; + writer.write_event(JsonEvent::Number( + &self.exec_duration.get().as_secs_f32().to_string(), + ))?; + } + writer.write_event(JsonEvent::ObjectKey("children"))?; + writer.write_event(JsonEvent::StartArray)?; + for child in &self.children { + child.json_node(writer, with_stats)?; + } + writer.write_event(JsonEvent::EndArray)?; + writer.write_event(JsonEvent::EndObject) + } + + fn node_label(&self) -> String { + match self.node.as_ref() { + PlanNode::Aggregate { + key_variables, + aggregates, + .. + } => format!( + "Aggregate({})", + key_variables + .iter() + .map(|c| c.to_string()) + .chain(aggregates.iter().map(|(agg, v)| format!("{agg} -> {v}"))) + .collect::>() + .join(", ") + ), + PlanNode::AntiJoin { .. } => "AntiJoin".to_owned(), + PlanNode::Extend { + expression, + variable, + .. + } => format!("Extend({expression} -> {variable})"), + PlanNode::Filter { expression, .. } => format!("Filter({expression})"), + PlanNode::ForLoopJoin { .. } => "ForLoopJoin".to_owned(), + PlanNode::ForLoopLeftJoin { .. } => "ForLoopLeftJoin".to_owned(), + PlanNode::HashDeduplicate { .. } => "HashDeduplicate".to_owned(), + PlanNode::HashJoin { .. } => "HashJoin".to_owned(), + PlanNode::HashLeftJoin { expression, .. } => format!("HashLeftJoin({expression})"), + PlanNode::Limit { count, .. } => format!("Limit({count})"), + PlanNode::PathPattern { + subject, + path, + object, + graph_name, + } => format!("PathPattern({subject} {path} {object} {graph_name})"), + PlanNode::Project { mapping, .. } => { + format!( + "Project({})", + mapping + .iter() + .map(|(f, t)| if f.plain == t.plain { + f.to_string() + } else { + format!("{f} -> {t}") + }) + .collect::>() + .join(", ") + ) + } + PlanNode::QuadPattern { + subject, + predicate, + object, + graph_name, + } => format!("QuadPattern({subject} {predicate} {object} {graph_name})"), + PlanNode::Reduced { .. } => "Reduced".to_owned(), + PlanNode::Service { + service_name, + silent, + .. + } => { + if *silent { + format!("SilentService({service_name})") + } else { + format!("Service({service_name})") + } + } + PlanNode::Skip { count, .. } => format!("Skip({count})"), + PlanNode::Sort { by, .. } => { + format!( + "Sort({})", + by.iter() + .map(|c| c.to_string()) + .collect::>() + .join(", ") + ) + } + PlanNode::StaticBindings { variables, .. } => { + format!( + "StaticBindings({})", + variables + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ) + } + PlanNode::Union { .. } => "Union".to_owned(), + } + } +} + +impl fmt::Debug for PlanNodeWithStats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut obj = f.debug_struct("Node"); + obj.field("name", &self.node_label()); + if self.exec_duration.get() > Duration::default() { + obj.field("number of results", &self.exec_count.get()); + obj.field("duration in seconds", &self.exec_duration.get()); + } + if !self.children.is_empty() { + obj.field("children", &self.children); + } + obj.finish() + } +} diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index 121cb027..dff7d02c 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -130,11 +130,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { self.base_iri.clone(), self.options.query_options.service_handler(), Rc::new(self.options.query_options.custom_functions.clone()), + false, ); let mut bnodes = HashMap::new(); + let (eval, _) = evaluator.plan_evaluator(Rc::new(plan)); let tuples = - evaluator.plan_evaluator(Rc::new(plan))(EncodedTuple::with_capacity(variables.len())) - .collect::, _>>()?; //TODO: would be much better to stream + eval(EncodedTuple::with_capacity(variables.len())).collect::, _>>()?; //TODO: would be much better to stream for tuple in tuples { for quad in delete { if let Some(quad) = diff --git a/lib/src/store.rs b/lib/src/store.rs index 2fce58c4..95234498 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -29,8 +29,8 @@ use crate::io::{ }; use crate::model::*; use crate::sparql::{ - evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update, - UpdateOptions, + evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions, + QueryResults, Update, UpdateOptions, }; use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; #[cfg(not(target_family = "wasm"))] @@ -207,7 +207,37 @@ impl Store { query: impl TryInto>, options: QueryOptions, ) -> Result { - evaluate_query(self.storage.snapshot(), query, options) + let (results, _) = self.explain_query_opt(query, options, false)?; + results + } + + /// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/) with some options and + /// returns a query explanation with some statistics (if enabled with the `with_stats` parameter). + /// + /// Beware: if you want to compute statistics you need to exhaust the results iterator before having a look at them. + /// + /// Usage example serialising the explanation with statistics in JSON: + /// ``` + /// use oxigraph::store::Store; + /// use oxigraph::sparql::{QueryOptions, QueryResults}; + /// + /// let store = Store::new()?; + /// if let (Ok(QueryResults::Solutions(solutions)), explanation) = store.explain_query_opt("SELECT ?s WHERE { VALUES ?s { 1 2 3 } }", QueryOptions::default(), true)? { + /// // We make sure to have read all the solutions + /// for _ in solutions { + /// } + /// let mut buf = Vec::new(); + /// explanation.write_in_json(&mut buf)?; + /// } + /// # Result::<_, Box>::Ok(()) + /// ``` + pub fn explain_query_opt( + &self, + query: impl TryInto>, + options: QueryOptions, + with_stats: bool, + ) -> Result<(Result, QueryExplanation), EvaluationError> { + evaluate_query(self.storage.snapshot(), query, options, with_stats) } /// Retrieves quads with a filter on each quad component @@ -918,7 +948,8 @@ impl<'a> Transaction<'a> { query: impl TryInto>, options: QueryOptions, ) -> Result { - evaluate_query(self.writer.reader(), query, options) + let (results, _) = evaluate_query(self.writer.reader(), query, options, false)?; + results } /// Retrieves quads with a filter on each quad component. diff --git a/lib/tests/store.rs b/lib/tests/store.rs index f80065ed..1851732d 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -12,11 +12,11 @@ use std::fs::{create_dir, remove_dir_all, File}; use std::io::Cursor; #[cfg(not(target_family = "wasm"))] use std::io::Write; -#[cfg(not(target_family = "wasm"))] +#[cfg(target_os = "linux")] use std::iter::once; #[cfg(not(target_family = "wasm"))] use std::path::{Path, PathBuf}; -#[cfg(not(target_family = "wasm"))] +#[cfg(target_os = "linux")] use std::process::Command; const DATA: &str = r#" @prefix schema: . @@ -501,7 +501,7 @@ fn test_open_read_only_bad_dir() -> Result<(), Box> { Ok(()) } -#[cfg(not(target_family = "wasm"))] +#[cfg(target_os = "linux")] fn reset_dir(dir: &str) -> Result<(), Box> { assert!(Command::new("git") .args(["clean", "-fX", dir]) diff --git a/server/src/main.rs b/server/src/main.rs index cec53526..7d5b5235 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -7,7 +7,7 @@ use oxigraph::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerialize use oxigraph::model::{ GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode, }; -use oxigraph::sparql::{Query, QueryResults, Update}; +use oxigraph::sparql::{Query, QueryOptions, QueryResults, Update}; use oxigraph::store::{BulkLoader, LoaderError, Store}; use oxiri::Iri; use rand::random; @@ -175,6 +175,23 @@ enum Command { /// By default the format is guessed from the results file extension. #[arg(long, required_unless_present = "results_file")] results_format: Option, + /// Prints to stderr a human-readable explanation of the query evaluation. + /// + /// Use the stats option to print also query evaluation statistics. + #[arg(long, conflicts_with = "explain_file")] + explain: bool, + /// Write to the given file an explanation of the query evaluation. + /// + /// If the file extension is .json the JSON format is used, if .txt a human readable format is used. + /// + /// Use the stats option to print also query evaluation statistics. + #[arg(long, conflicts_with = "explain")] + explain_file: Option, + /// Computes some evaluation statistics to print as part of the query explanations. + /// + /// Beware, computing the statistics adds some overhead to the evaluation runtime. + #[arg(long)] + stats: bool, }, /// Executes a SPARQL update against the store. Update { @@ -424,6 +441,9 @@ pub fn main() -> anyhow::Result<()> { query_base, results_file, results_format, + explain, + explain_file, + stats, } => { let query = if let Some(query) = query { query @@ -443,107 +463,130 @@ pub fn main() -> anyhow::Result<()> { .location .ok_or_else(|| anyhow!("The --location argument is required"))?, )?; - match store.query(query)? { - QueryResults::Solutions(solutions) => { - let format = if let Some(name) = results_format { - if let Some(format) = QueryResultsFormat::from_extension(&name) { - format - } else if let Some(format) = QueryResultsFormat::from_media_type(&name) { - format + let (results, explanation) = + store.explain_query_opt(query, QueryOptions::default(), stats)?; + let print_result = (|| { + match results? { + QueryResults::Solutions(solutions) => { + let format = if let Some(name) = results_format { + if let Some(format) = QueryResultsFormat::from_extension(&name) { + format + } else if let Some(format) = QueryResultsFormat::from_media_type(&name) + { + format + } else { + bail!("The file format '{name}' is unknown") + } + } else if let Some(results_file) = &results_file { + format_from_path(results_file, |ext| { + QueryResultsFormat::from_extension(ext) + .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) + })? } else { - bail!("The file format '{name}' is unknown") + bail!("The --results-format option must be set when writing to stdout") + }; + if let Some(results_file) = results_file { + let mut writer = QueryResultsSerializer::from_format(format) + .solutions_writer( + BufWriter::new(File::create(results_file)?), + solutions.variables().to_vec(), + )?; + for solution in solutions { + writer.write(&solution?)?; + } + writer.finish()?; + } else { + let stdout = stdout(); // Not needed in Rust 1.61 + let mut writer = QueryResultsSerializer::from_format(format) + .solutions_writer(stdout.lock(), solutions.variables().to_vec())?; + for solution in solutions { + writer.write(&solution?)?; + } + let _ = writer.finish()?; } - } else if let Some(results_file) = &results_file { - format_from_path(results_file, |ext| { - QueryResultsFormat::from_extension(ext) - .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) - })? - } else { - bail!("The --results-format option must be set when writing to stdout") - }; - if let Some(results_file) = results_file { - let mut writer = QueryResultsSerializer::from_format(format) - .solutions_writer( + } + QueryResults::Boolean(result) => { + let format = if let Some(name) = results_format { + if let Some(format) = QueryResultsFormat::from_extension(&name) { + format + } else if let Some(format) = QueryResultsFormat::from_media_type(&name) + { + format + } else { + bail!("The file format '{name}' is unknown") + } + } else if let Some(results_file) = &results_file { + format_from_path(results_file, |ext| { + QueryResultsFormat::from_extension(ext) + .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) + })? + } else { + bail!("The --results-format option must be set when writing to stdout") + }; + if let Some(results_file) = results_file { + QueryResultsSerializer::from_format(format).write_boolean_result( BufWriter::new(File::create(results_file)?), - solutions.variables().to_vec(), + result, )?; - for solution in solutions { - writer.write(&solution?)?; - } - writer.finish()?; - } else { - let stdout = stdout(); // Not needed in Rust 1.61 - let mut writer = QueryResultsSerializer::from_format(format) - .solutions_writer(stdout.lock(), solutions.variables().to_vec())?; - for solution in solutions { - writer.write(&solution?)?; - } - let _ = writer.finish()?; - } - } - QueryResults::Boolean(result) => { - let format = if let Some(name) = results_format { - if let Some(format) = QueryResultsFormat::from_extension(&name) { - format - } else if let Some(format) = QueryResultsFormat::from_media_type(&name) { - format } else { - bail!("The file format '{name}' is unknown") + let _ = QueryResultsSerializer::from_format(format) + .write_boolean_result(stdout().lock(), result)?; } - } else if let Some(results_file) = &results_file { - format_from_path(results_file, |ext| { - QueryResultsFormat::from_extension(ext) - .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) - })? - } else { - bail!("The --results-format option must be set when writing to stdout") - }; - if let Some(results_file) = results_file { - QueryResultsSerializer::from_format(format).write_boolean_result( - BufWriter::new(File::create(results_file)?), - result, - )?; - } else { - let _ = QueryResultsSerializer::from_format(format) - .write_boolean_result(stdout().lock(), result)?; } - } - QueryResults::Graph(triples) => { - let format = if let Some(name) = results_format { - if let Some(format) = GraphFormat::from_extension(&name) { - format - } else if let Some(format) = GraphFormat::from_media_type(&name) { - format + QueryResults::Graph(triples) => { + let format = if let Some(name) = results_format { + if let Some(format) = GraphFormat::from_extension(&name) { + format + } else if let Some(format) = GraphFormat::from_media_type(&name) { + format + } else { + bail!("The file format '{name}' is unknown") + } + } else if let Some(results_file) = &results_file { + format_from_path(results_file, |ext| { + GraphFormat::from_extension(ext) + .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) + })? } else { - bail!("The file format '{name}' is unknown") - } - } else if let Some(results_file) = &results_file { - format_from_path(results_file, |ext| { - GraphFormat::from_extension(ext) - .ok_or_else(|| anyhow!("The file extension '{ext}' is unknown")) - })? - } else { - bail!("The --results-format option must be set when writing to stdout") - }; - if let Some(results_file) = results_file { - let mut writer = GraphSerializer::from_format(format) - .triple_writer(BufWriter::new(File::create(results_file)?))?; - for triple in triples { - writer.write(triple?.as_ref())?; - } - writer.finish()?; - } else { - let stdout = stdout(); // Not needed in Rust 1.61 - let mut writer = - GraphSerializer::from_format(format).triple_writer(stdout.lock())?; - for triple in triples { - writer.write(triple?.as_ref())?; + bail!("The --results-format option must be set when writing to stdout") + }; + if let Some(results_file) = results_file { + let mut writer = GraphSerializer::from_format(format) + .triple_writer(BufWriter::new(File::create(results_file)?))?; + for triple in triples { + writer.write(triple?.as_ref())?; + } + writer.finish()?; + } else { + let stdout = stdout(); // Not needed in Rust 1.61 + let mut writer = GraphSerializer::from_format(format) + .triple_writer(stdout.lock())?; + for triple in triples { + writer.write(triple?.as_ref())?; + } + writer.finish()?; } - writer.finish()?; } } + Ok(()) + })(); + if let Some(explain_file) = explain_file { + let mut file = BufWriter::new(File::create(&explain_file)?); + match explain_file + .extension() + .and_then(|e| e.to_str()) { + Some("json") => { + explanation.write_in_json(file)?; + }, + Some("txt") => { + write!(file, "{:?}", explanation)?; + }, + _ => bail!("The given explanation file {} must have an extension that is .json or .txt", explain_file.display()) + } + } else if explain || stats { + eprintln!("{:?}", explanation); } - Ok(()) + print_result } Command::Update { update,