Exposes the query profiler to the CLI app

pull/470/head
Tpt 2 years ago committed by Thomas Tanon
parent dcd59ac4dd
commit 60ffd99ad8
  1. 2
      Cargo.lock
  2. 56
      bench/explanation_to_flamegraph.py
  3. 52
      bench/explanation_to_trace.py
  4. 2
      lib/Cargo.toml
  5. 13
      lib/src/sparql/algebra.rs
  6. 184
      lib/src/sparql/eval.rs
  7. 84
      lib/src/sparql/mod.rs
  8. 398
      lib/src/sparql/plan.rs
  9. 5
      lib/src/sparql/update.rs
  10. 39
      lib/src/store.rs
  11. 6
      lib/tests/store.rs
  12. 223
      server/src/main.rs

2
Cargo.lock generated

@ -932,6 +932,8 @@ dependencies = [
"digest",
"getrandom",
"hex",
"js-sys",
"json-event-parser",
"lazy_static",
"libc",
"md-5",

@ -0,0 +1,56 @@
"""
Converts a SPARQL query JSON explanation file to a flamegraph.
Usage: python explanation_to_flamegraph.py explanation.json flamegraph.svg
"""
import json
import subprocess
from argparse import ArgumentParser
from pathlib import Path
from shutil import which
from tempfile import NamedTemporaryFile
if which('flamegraph.pl') is None:
raise Exception(
'This script requires the flamegraph.pl script from https://github.com/brendangregg/FlameGraph to be installed and be in $PATH.')
parser = ArgumentParser(
prog='OxigraphFlamegraph',
description='Builds a flamegraph from the Oxigraph query explanation JSON format',
epilog='Text at the bottom of help')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('flamegraph_svg', type=Path)
args = parser.parse_args()
def trace_line(label: str, value: float):
return f"{label} {int(value * 1_000_000)}"
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
if "parsing duration in seconds" in explanation:
trace.append(trace_line("parsing", explanation['parsing duration in seconds']))
if "planning duration in seconds" in explanation:
trace.append(trace_line("planning", explanation['planning duration in seconds']))
already_used_names = {}
def add_to_trace(node, path):
path = f"{path};{node['name'].replace(' ', '`')}"
if path in already_used_names:
already_used_names[path] += 1
path = f"{path}`{already_used_names[path]}"
else:
already_used_names[path] = 0
samples = node['duration in seconds'] - sum(child['duration in seconds'] for child in node.get("children", ()))
trace.append(trace_line(path, samples))
for i, child in enumerate(node.get("children", ())):
add_to_trace(child, path)
add_to_trace(explanation["plan"], 'eval')
with NamedTemporaryFile('w+t') as fp:
fp.write('\n'.join(trace))
fp.flush()
args.flamegraph_svg.write_bytes(subprocess.run(['flamegraph.pl', fp.name], stdout=subprocess.PIPE).stdout)

@ -0,0 +1,52 @@
"""
Converts a SPARQL query JSON explanation file to a tracing event file compatible with Chrome.
Usage: python explanation_to_trace.py explanation.json trace.json
"""
import json
from argparse import ArgumentParser
from pathlib import Path
parser = ArgumentParser(
prog='OxigraphTracing',
description='Builds a Trace Event Format file from the Oxigraph query explanation JSON format')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('json_trace_event', type=Path)
args = parser.parse_args()
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
def trace_element(name: str, cat: str, start_s: float, duration_s: float):
return {
"name": name,
"cat": cat,
"ph": "X",
"ts": int(start_s * 1_000_000),
"dur": int(duration_s * 1_000_000),
"pid": 1
}
def add_to_trace(node, path, start_time: float):
path = f"{path};{node['name'].replace(' ', '`')}"
trace.append(trace_element(node["name"], node["name"].split("(")[0], start_time, node["duration in seconds"]))
for child in node.get("children", ()):
add_to_trace(child, path, start_time)
start_time += child["duration in seconds"]
current_time = 0
if "parsing duration in seconds" in explanation:
d = explanation["parsing duration in seconds"]
trace.append(trace_element(f"parsing", "parsing", current_time, d))
current_time += d
if "planning duration in seconds" in explanation:
d = explanation["planning duration in seconds"]
trace.append(trace_element(f"planning", "planning", current_time, d))
current_time += d
add_to_trace(explanation["plan"], 'eval', current_time)
with args.json_trace_event.open("wt") as fp:
json.dump(trace, fp)

@ -36,6 +36,7 @@ rio_xml = "0.8"
hex = "0.4"
siphasher = "0.3"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" }
spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }
@ -48,6 +49,7 @@ oxhttp = { version = "0.1", optional = true }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", features = ["js"] }
js-sys = "0.3"
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.4"

@ -5,9 +5,11 @@
//! Warning: this implementation is an unstable work in progress
use crate::model::*;
use crate::sparql::eval::Timer;
use spargebra::GraphUpdateOperation;
use std::fmt;
use std::str::FromStr;
use std::time::Duration;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
///
@ -30,13 +32,19 @@ use std::str::FromStr;
pub struct Query {
pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<Duration>,
}
impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> {
let query = spargebra::Query::parse(query, base_iri)?;
Ok(query.into())
let start = Timer::now();
let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self {
dataset: query.dataset,
inner: query.inner,
parsing_duration: Some(start.elapsed()),
})
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
@ -90,6 +98,7 @@ impl From<spargebra::Query> for Query {
| spargebra::Query::Ask { dataset, .. } => dataset,
}),
inner: query,
parsing_duration: None,
}
}
}

@ -28,6 +28,8 @@ use std::iter::Iterator;
use std::iter::{empty, once};
use std::rc::Rc;
use std::str;
use std::time::Duration as StdDuration;
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
use std::time::Instant;
const REGEX_SIZE_LIMIT: usize = 1_000_000;
@ -42,6 +44,7 @@ pub struct SimpleEvaluator {
now: DateTime,
service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>,
custom_functions: Rc<CustomFunctionRegistry>,
run_stats: bool,
}
impl SimpleEvaluator {
@ -50,6 +53,7 @@ impl SimpleEvaluator {
base_iri: Option<Rc<Iri<String>>>,
service_handler: Rc<dyn ServiceHandler<Error = EvaluationError>>,
custom_functions: Rc<CustomFunctionRegistry>,
run_stats: bool,
) -> Self {
Self {
dataset,
@ -57,6 +61,7 @@ impl SimpleEvaluator {
now: DateTime::now().unwrap(),
service_handler,
custom_functions,
run_stats,
}
}
@ -64,69 +69,76 @@ impl SimpleEvaluator {
&self,
plan: Rc<PlanNode>,
variables: Rc<Vec<Variable>>,
) -> QueryResults {
let iter = self.plan_evaluator(plan)(EncodedTuple::with_capacity(variables.len()));
QueryResults::Solutions(decode_bindings(self.dataset.clone(), iter, variables))
) -> (QueryResults, Rc<PlanNodeWithStats>) {
let (eval, stats) = self.plan_evaluator(plan);
(
QueryResults::Solutions(decode_bindings(
self.dataset.clone(),
eval(EncodedTuple::with_capacity(variables.len())),
variables,
)),
stats,
)
}
pub fn evaluate_ask_plan(&self, plan: Rc<PlanNode>) -> Result<QueryResults, EvaluationError> {
pub fn evaluate_ask_plan(
&self,
plan: Rc<PlanNode>,
) -> (Result<QueryResults, EvaluationError>, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len());
match self.plan_evaluator(plan)(from).next() {
Some(Ok(_)) => Ok(QueryResults::Boolean(true)),
Some(Err(error)) => Err(error),
None => Ok(QueryResults::Boolean(false)),
}
let (eval, stats) = self.plan_evaluator(plan);
(
match eval(from).next() {
Some(Ok(_)) => Ok(QueryResults::Boolean(true)),
Some(Err(error)) => Err(error),
None => Ok(QueryResults::Boolean(false)),
},
stats,
)
}
pub fn evaluate_construct_plan(
&self,
plan: Rc<PlanNode>,
template: Vec<TripleTemplate>,
) -> QueryResults {
) -> (QueryResults, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len());
QueryResults::Graph(QueryTripleIter {
iter: Box::new(ConstructIterator {
eval: self.clone(),
iter: self.plan_evaluator(plan)(from),
template,
buffered_results: Vec::default(),
bnodes: Vec::default(),
let (eval, stats) = self.plan_evaluator(plan);
(
QueryResults::Graph(QueryTripleIter {
iter: Box::new(ConstructIterator {
eval: self.clone(),
iter: eval(from),
template,
buffered_results: Vec::default(),
bnodes: Vec::default(),
}),
}),
})
stats,
)
}
pub fn evaluate_describe_plan(&self, plan: Rc<PlanNode>) -> QueryResults {
pub fn evaluate_describe_plan(
&self,
plan: Rc<PlanNode>,
) -> (QueryResults, Rc<PlanNodeWithStats>) {
let from = EncodedTuple::with_capacity(plan.used_variables().len());
QueryResults::Graph(QueryTripleIter {
iter: Box::new(DescribeIterator {
eval: self.clone(),
iter: self.plan_evaluator(plan)(from),
quads: Box::new(empty()),
let (eval, stats) = self.plan_evaluator(plan);
(
QueryResults::Graph(QueryTripleIter {
iter: Box::new(DescribeIterator {
eval: self.clone(),
iter: eval(from),
quads: Box::new(empty()),
}),
}),
})
stats,
)
}
pub fn plan_evaluator(
&self,
node: Rc<PlanNode>,
) -> Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator> {
self.build_plan_evaluator(node, false).0
}
pub fn plan_evaluator_with_stats(
&self,
node: Rc<PlanNode>,
) -> (
Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>,
Rc<PlanNodeWithStats>,
) {
self.build_plan_evaluator(node, true)
}
fn build_plan_evaluator(
&self,
node: Rc<PlanNode>,
run_stats: bool,
) -> (
Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator>,
Rc<PlanNodeWithStats>,
@ -372,9 +384,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables())
.copied()
.collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats);
let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats);
let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats);
if join_keys.is_empty() {
// Cartesian product
@ -418,9 +430,9 @@ impl SimpleEvaluator {
}
}
PlanNode::ForLoopJoin { left, right } => {
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats);
let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats);
let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats);
Rc::new(move |from| {
let right = right.clone();
@ -436,9 +448,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables())
.copied()
.collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats);
let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats);
let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats);
if join_keys.is_empty() {
Rc::new(move |from| {
@ -479,9 +491,9 @@ impl SimpleEvaluator {
.intersection(&right.always_bound_variables())
.copied()
.collect();
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats);
let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats);
let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats);
let expression = self.expression_evaluator(expression);
// Real hash join
@ -508,9 +520,9 @@ impl SimpleEvaluator {
right,
possible_problem_vars,
} => {
let (left, left_stats) = self.build_plan_evaluator(left.clone(), run_stats);
let (left, left_stats) = self.plan_evaluator(left.clone());
stat_children.push(left_stats);
let (right, right_stats) = self.build_plan_evaluator(right.clone(), run_stats);
let (right, right_stats) = self.plan_evaluator(right.clone());
stat_children.push(right_stats);
let possible_problem_vars = possible_problem_vars.clone();
Rc::new(move |from| {
@ -533,7 +545,7 @@ impl SimpleEvaluator {
})
}
PlanNode::Filter { child, expression } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let expression = self.expression_evaluator(expression);
Rc::new(move |from| {
@ -552,8 +564,7 @@ impl SimpleEvaluator {
let children: Vec<_> = children
.iter()
.map(|child| {
let (child, child_stats) =
self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
child
})
@ -572,7 +583,7 @@ impl SimpleEvaluator {
variable,
expression,
} => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let position = variable.encoded;
let expression = self.expression_evaluator(expression);
@ -588,7 +599,7 @@ impl SimpleEvaluator {
})
}
PlanNode::Sort { child, by } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let by: Vec<_> = by
.iter()
@ -646,12 +657,12 @@ impl SimpleEvaluator {
})
}
PlanNode::HashDeduplicate { child } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
Rc::new(move |from| Box::new(hash_deduplicate(child(from))))
}
PlanNode::Reduced { child } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
Rc::new(move |from| {
Box::new(ConsecutiveDeduplication {
@ -661,19 +672,19 @@ impl SimpleEvaluator {
})
}
PlanNode::Skip { child, count } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let count = *count;
Rc::new(move |from| Box::new(child(from).skip(count)))
}
PlanNode::Limit { child, count } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let count = *count;
Rc::new(move |from| Box::new(child(from).take(count)))
}
PlanNode::Project { child, mapping } => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let mapping = mapping.clone();
Rc::new(move |from| {
@ -713,7 +724,7 @@ impl SimpleEvaluator {
key_variables,
aggregates,
} => {
let (child, child_stats) = self.build_plan_evaluator(child.clone(), run_stats);
let (child, child_stats) = self.plan_evaluator(child.clone());
stat_children.push(child_stats);
let key_variables = key_variables.clone();
let aggregate_input_expressions: Vec<_> = aggregates
@ -810,10 +821,10 @@ impl SimpleEvaluator {
exec_count: Cell::new(0),
exec_duration: Cell::new(std::time::Duration::from_secs(0)),
});
if run_stats {
if self.run_stats {
let stats = stats.clone();
evaluator = Rc::new(move |tuple| {
let start = Instant::now();
let start = Timer::now();
let inner = evaluator(tuple);
stats
.exec_duration
@ -845,6 +856,7 @@ impl SimpleEvaluator {
base_iri: self.base_iri.as_ref().map(|iri| iri.as_ref().clone()),
},
dataset: QueryDataset::new(),
parsing_duration: None,
},
)? {
Ok(encode_bindings(self.dataset.clone(), variables, iter))
@ -933,7 +945,7 @@ impl SimpleEvaluator {
Rc::new(move |tuple| tuple.get(v).cloned())
}
PlanExpression::Exists(plan) => {
let eval = self.plan_evaluator(plan.clone());
let (eval, _) = self.plan_evaluator(plan.clone()); //TODO: stats
Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into()))
}
PlanExpression::Or(a, b) => {
@ -4775,7 +4787,7 @@ impl Iterator for StatsIterator {
type Item = Result<EncodedTuple, EvaluationError>;
fn next(&mut self) -> Option<Result<EncodedTuple, EvaluationError>> {
let start = Instant::now();
let start = Timer::now();
let result = self.inner.next();
self.stats
.exec_duration
@ -4787,6 +4799,42 @@ impl Iterator for StatsIterator {
}
}
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
pub struct Timer {
timestamp_ms: f64,
}
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
impl Timer {
pub fn now() -> Self {
Self {
timestamp_ms: js_sys::Date::now(),
}
}
pub fn elapsed(&self) -> StdDuration {
StdDuration::from_secs_f64((js_sys::Date::now() - self.timestamp_ms) / 1000.)
}
}
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
pub struct Timer {
instant: Instant,
}
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
impl Timer {
pub fn now() -> Self {
Self {
instant: Instant::now(),
}
}
pub fn elapsed(&self) -> StdDuration {
self.instant.elapsed()
}
}
#[test]
fn uuid() {
let mut buffer = String::default();

@ -17,29 +17,34 @@ use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::{EvaluationError, QueryError};
use crate::sparql::eval::SimpleEvaluator;
use crate::sparql::eval::{SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
use crate::sparql::plan::PlanNodeWithStats;
use crate::sparql::plan_builder::PlanBuilder;
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, JsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
pub use sparesults::QueryResultsFormat;
pub use spargebra::ParseError;
use std::collections::HashMap;
use std::rc::Rc;
use std::time::Duration;
use std::{fmt, io};
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn evaluate_query(
reader: StorageReader,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
) -> Result<QueryResults, EvaluationError> {
run_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
let query = query.try_into().map_err(Into::into)?;
let dataset = DatasetView::new(reader, &query.dataset);
match query.inner {
let start_planning = Timer::now();
let (results, plan_node_with_stats, planning_duration) = match query.inner {
spargebra::Query::Select {
pattern, base_iri, ..
} => {
@ -50,13 +55,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions,
options.without_optimizations,
)?;
Ok(SimpleEvaluator::new(
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
run_stats,
)
.evaluate_select_plan(Rc::new(plan), Rc::new(variables)))
.evaluate_select_plan(Rc::new(plan), Rc::new(variables));
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Ask {
pattern, base_iri, ..
@ -68,13 +76,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions,
options.without_optimizations,
)?;
SimpleEvaluator::new(
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
run_stats,
)
.evaluate_ask_plan(Rc::new(plan))
.evaluate_ask_plan(Rc::new(plan));
(results, explanation, planning_duration)
}
spargebra::Query::Construct {
template,
@ -96,13 +107,16 @@ pub(crate) fn evaluate_query(
&options.custom_functions,
options.without_optimizations,
);
Ok(SimpleEvaluator::new(
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
run_stats,
)
.evaluate_construct_plan(Rc::new(plan), construct))
.evaluate_construct_plan(Rc::new(plan), construct);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Describe {
pattern, base_iri, ..
@ -114,15 +128,25 @@ pub(crate) fn evaluate_query(
&options.custom_functions,
options.without_optimizations,
)?;
Ok(SimpleEvaluator::new(
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Rc::new(options.custom_functions),
run_stats,
)
.evaluate_describe_plan(Rc::new(plan)))
.evaluate_describe_plan(Rc::new(plan));
(Ok(results), explanation, planning_duration)
}
}
};
let explanation = QueryExplanation {
inner: plan_node_with_stats,
with_stats: run_stats,
parsing_duration: query.parsing_duration,
planning_duration,
};
Ok((results, explanation))
}
/// Options for SPARQL query evaluation.
@ -257,3 +281,39 @@ impl From<QueryOptions> for UpdateOptions {
Self { query_options }
}
}
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<PlanNodeWithStats>,
with_stats: bool,
parsing_duration: Option<Duration>,
planning_duration: Duration,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, output: impl io::Write) -> io::Result<()> {
let mut writer = JsonWriter::from_writer(output);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&parsing_duration.as_secs_f32().to_string(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&self.planning_duration.as_secs_f32().to_string(),
))?;
writer.write_event(JsonEvent::ObjectKey("plan"))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
}
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.inner)
}
}

@ -1,6 +1,7 @@
use crate::model::{BlankNode, Literal, NamedNode, Term, Triple};
use crate::sparql::Variable;
use crate::storage::numeric_encoder::EncodedTerm;
use json_event_parser::{JsonEvent, JsonWriter};
use regex::Regex;
use spargebra::algebra::GraphPattern;
use spargebra::term::GroundTerm;
@ -10,6 +11,7 @@ use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet};
use std::rc::Rc;
use std::time::Duration;
use std::{fmt, io};
#[derive(Debug)]
pub enum PlanNode {
@ -387,6 +389,12 @@ pub struct PlanTerm<T> {
pub plain: T,
}
impl<T: fmt::Display> fmt::Display for PlanTerm<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.plain)
}
}
#[derive(Debug, Clone)]
pub enum PatternValue {
Constant(PlanTerm<PatternValueConstant>),
@ -394,6 +402,16 @@ pub enum PatternValue {
TriplePattern(Box<TriplePatternValue>),
}
impl fmt::Display for PatternValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Constant(c) => write!(f, "{c}"),
Self::Variable(v) => write!(f, "{v}"),
Self::TriplePattern(p) => write!(f, "{p}"),
}
}
}
#[derive(Debug, Clone)]
pub enum PatternValueConstant {
NamedNode(NamedNode),
@ -402,6 +420,17 @@ pub enum PatternValueConstant {
DefaultGraph,
}
impl fmt::Display for PatternValueConstant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NamedNode(n) => write!(f, "{n}"),
Self::Literal(l) => write!(f, "{l}"),
Self::Triple(t) => write!(f, "<< {t} >>"),
Self::DefaultGraph => f.write_str("DEFAULT"),
}
}
}
#[derive(Debug, Clone)]
pub struct TriplePatternValue {
pub subject: PatternValue,
@ -409,12 +438,24 @@ pub struct TriplePatternValue {
pub object: PatternValue,
}
impl fmt::Display for TriplePatternValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {} {}", self.subject, self.predicate, self.object)
}
}
#[derive(Debug, Clone)]
pub struct PlanVariable<P = Variable> {
pub encoded: usize,
pub plain: P,
}
impl<P: fmt::Display> fmt::Display for PlanVariable<P> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.plain)
}
}
#[derive(Debug, Clone)]
pub enum PlanExpression {
NamedNode(PlanTerm<NamedNode>),
@ -625,6 +666,144 @@ impl PlanExpression {
}
}
impl fmt::Display for PlanExpression {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Variable(v) => {
write!(f, "{v}")
}
Self::Bound(v) => {
write!(f, "Bound({v})")
}
Self::NamedNode(n) => write!(f, "{n}"),
Self::Literal(l) => write!(f, "{l}"),
Self::Rand => write!(f, "Rand()"),
Self::Now => write!(f, "Now()"),
Self::Uuid => write!(f, "Uuid()"),
Self::StrUuid => write!(f, "StrUuid()"),
Self::UnaryPlus(e) => write!(f, "UnaryPlus({e})"),
Self::UnaryMinus(e) => write!(f, "UnaryMinus({e})"),
Self::Not(e) => write!(f, "Not({e})"),
Self::BNode(e) => {
if let Some(e) = e {
write!(f, "BNode({e})")
} else {
write!(f, "BNode()")
}
}
Self::Str(e) => write!(f, "Str({e})"),
Self::Lang(e) => write!(f, "Lang({e})"),
Self::Datatype(e) => write!(f, "Datatype({e})"),
Self::Iri(e) => write!(f, "Iri({e})"),
Self::Abs(e) => write!(f, "Abs({e})"),
Self::Ceil(e) => write!(f, "Ceil({e})"),
Self::Floor(e) => write!(f, "Floor({e})"),
Self::Round(e) => write!(f, "Round({e})"),
Self::UCase(e) => write!(f, "UCase({e})"),
Self::LCase(e) => write!(f, "LCase({e})"),
Self::StrLen(e) => write!(f, "StrLen({e})"),
Self::EncodeForUri(e) => write!(f, "EncodeForUri({e})"),
Self::StaticRegex(e, r) => write!(f, "StaticRegex({e}, {r})"),
Self::Year(e) => write!(f, "Year({e})"),
Self::Month(e) => write!(f, "Month({e})"),
Self::Day(e) => write!(f, "Day({e})"),
Self::Hours(e) => write!(f, "Hours({e})"),
Self::Minutes(e) => write!(f, "Minutes({e})"),
Self::Seconds(e) => write!(f, "Seconds({e})"),
Self::Timezone(e) => write!(f, "Timezone({e})"),
Self::Tz(e) => write!(f, "Tz({e})"),
Self::Md5(e) => write!(f, "Md5({e})"),
Self::Sha1(e) => write!(f, "Sha1({e})"),
Self::Sha256(e) => write!(f, "Sha256({e})"),
Self::Sha384(e) => write!(f, "Sha384({e})"),
Self::Sha512(e) => write!(f, "Sha512({e})"),
Self::IsIri(e) => write!(f, "IsIri({e})"),
Self::IsBlank(e) => write!(f, "IsBlank({e})"),
Self::IsLiteral(e) => write!(f, "IsLiteral({e})"),
Self::IsNumeric(e) => write!(f, "IsNumeric({e})"),
Self::IsTriple(e) => write!(f, "IsTriple({e})"),
Self::Subject(e) => write!(f, "Subject({e})"),
Self::Predicate(e) => write!(f, "Predicate({e})"),
Self::Object(e) => write!(f, "Object({e})"),
Self::BooleanCast(e) => write!(f, "BooleanCast({e})"),
Self::DoubleCast(e) => write!(f, "DoubleCast({e})"),
Self::FloatCast(e) => write!(f, "FloatCast({e})"),
Self::DecimalCast(e) => write!(f, "DecimalCast({e})"),
Self::IntegerCast(e) => write!(f, "IntegerCast({e})"),
Self::DateCast(e) => write!(f, "DateCast({e})"),
Self::TimeCast(e) => write!(f, "TimeCast({e})"),
Self::DateTimeCast(e) => write!(f, "DateTimeCast({e})"),
Self::DurationCast(e) => write!(f, "DurationCast({e})"),
Self::YearMonthDurationCast(e) => write!(f, "YearMonthDurationCast({e})"),
Self::DayTimeDurationCast(e) => write!(f, "DayTimeDurationCast({e})"),
Self::StringCast(e) => write!(f, "StringCast({e})"),
Self::Or(a, b) => write!(f, "Or({a}, {b})"),
Self::And(a, b) => write!(f, "And({a}, {b})"),
Self::Equal(a, b) => write!(f, "Equal({a}, {b})"),
Self::Greater(a, b) => write!(f, "Greater({a}, {b})"),
Self::GreaterOrEqual(a, b) => write!(f, "GreaterOrEqual({a}, {b})"),
Self::Less(a, b) => write!(f, "Less({a}, {b})"),
Self::LessOrEqual(a, b) => write!(f, "LessOrEqual({a}, {b})"),
Self::Add(a, b) => write!(f, "Add({a}, {b})"),
Self::Subtract(a, b) => write!(f, "Subtract({a}, {b})"),
Self::Multiply(a, b) => write!(f, "Multiply({a}, {b})"),
Self::Divide(a, b) => write!(f, "Divide({a}, {b})"),
Self::LangMatches(a, b) => write!(f, "LangMatches({a}, {b})"),
Self::Contains(a, b) => write!(f, "Contains({a}, {b})"),
Self::StaticReplace(a, b, c) => write!(f, "StaticReplace({a}, {b}, {c})"),
Self::StrStarts(a, b) => write!(f, "StrStarts({a}, {b})"),
Self::StrEnds(a, b) => write!(f, "StrEnds({a}, {b})"),
Self::StrBefore(a, b) => write!(f, "StrBefore({a}, {b})"),
Self::StrAfter(a, b) => write!(f, "StrAfter({a}, {b})"),
Self::StrLang(a, b) => write!(f, "StrLang({a}, {b})"),
Self::StrDt(a, b) => write!(f, "StrDt({a}, {b})"),
Self::SameTerm(a, b) => write!(f, "SameTerm({a}, {b})"),
Self::SubStr(a, b, None) => write!(f, "SubStr({a}, {b})"),
Self::DynamicRegex(a, b, None) => write!(f, "DynamicRegex({a}, {b})"),
Self::Adjust(a, b) => write!(f, "Adjust({a}, {b})"),
Self::If(a, b, c) => write!(f, "If({a}, {b}, {c})"),
Self::SubStr(a, b, Some(c)) => write!(f, "SubStr({a}, {b}, {c})"),
Self::DynamicRegex(a, b, Some(c)) => write!(f, "DynamicRegex({a}, {b}, {c})"),
Self::DynamicReplace(a, b, c, None) => write!(f, "DynamicReplace({a}, {b}, {c})"),
Self::Triple(a, b, c) => write!(f, "Triple({a}, {b}, {c})"),
Self::DynamicReplace(a, b, c, Some(d)) => {
write!(f, "DynamicReplace({a}, {b}, {c}, {d})")
}
Self::Concat(es) => {
write!(f, "Concat(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::Coalesce(es) => {
write!(f, "Coalesce(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::CustomFunction(name, es) => {
write!(f, "{name}(")?;
for (i, e) in es.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{e}")?;
}
write!(f, ")")
}
Self::Exists(_) => write!(f, "Exists()"), //TODO
}
}
}
#[derive(Debug, Clone)]
pub struct PlanAggregation {
pub function: PlanAggregationFunction,
@ -632,6 +811,43 @@ pub struct PlanAggregation {
pub distinct: bool,
}
impl fmt::Display for PlanAggregation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.function {
PlanAggregationFunction::Count => {
write!(f, "Count")
}
PlanAggregationFunction::Sum => {
write!(f, "Sum")
}
PlanAggregationFunction::Min => {
write!(f, "Min")
}
PlanAggregationFunction::Max => {
write!(f, "Max")
}
PlanAggregationFunction::Avg => {
write!(f, "Avg")
}
PlanAggregationFunction::GroupConcat { .. } => {
write!(f, "GroupConcat")
}
PlanAggregationFunction::Sample => write!(f, "Sample"),
}?;
if self.distinct {
write!(f, "Distinct")?;
}
write!(f, "(")?;
if let Some(expr) = &self.parameter {
write!(f, "{expr}")?;
}
if let PlanAggregationFunction::GroupConcat { separator } = &self.function {
write!(f, "; separator={separator}")?;
}
write!(f, ")")
}
}
#[derive(Debug, Clone)]
pub enum PlanAggregationFunction {
Count,
@ -655,12 +871,45 @@ pub enum PlanPropertyPath {
NegatedPropertySet(Rc<Vec<PlanTerm<NamedNode>>>),
}
impl fmt::Display for PlanPropertyPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Path(p) => write!(f, "{p}"),
Self::Reverse(p) => write!(f, "Reverse({p})"),
Self::Sequence(a, b) => write!(f, "Sequence{a}, {b}"),
Self::Alternative(a, b) => write!(f, "Alternative{a}, {b}"),
Self::ZeroOrMore(p) => write!(f, "ZeroOrMore({p})"),
Self::OneOrMore(p) => write!(f, "OneOrMore({p})"),
Self::ZeroOrOne(p) => write!(f, "ZeroOrOne({p})"),
Self::NegatedPropertySet(ps) => {
write!(f, "NegatedPropertySet(")?;
for (i, p) in ps.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{p}")?;
}
write!(f, ")")
}
}
}
}
#[derive(Debug, Clone)]
pub enum Comparator {
Asc(PlanExpression),
Desc(PlanExpression),
}
impl fmt::Display for Comparator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Asc(c) => write!(f, "Asc({c})"),
Self::Desc(c) => write!(f, "Desc({c})"),
}
}
}
#[derive(Debug, Clone)]
pub struct TripleTemplate {
pub subject: TripleTemplateValue,
@ -668,6 +917,12 @@ pub struct TripleTemplate {
pub object: TripleTemplateValue,
}
impl fmt::Display for TripleTemplate {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {} {}", self.subject, self.predicate, self.object)
}
}
#[derive(Debug, Clone)]
pub enum TripleTemplateValue {
Constant(PlanTerm<Term>),
@ -676,6 +931,17 @@ pub enum TripleTemplateValue {
Triple(Box<TripleTemplate>),
}
impl fmt::Display for TripleTemplateValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Constant(c) => write!(f, "{c}"),
Self::BlankNode(bn) => write!(f, "{bn}"),
Self::Variable(v) => write!(f, "{v}"),
Self::Triple(t) => write!(f, "<< {t} >>"),
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTuple {
inner: Vec<Option<EncodedTerm>>,
@ -761,3 +1027,135 @@ pub struct PlanNodeWithStats {
pub exec_count: Cell<usize>,
pub exec_duration: Cell<Duration>,
}
impl PlanNodeWithStats {
pub fn json_node(
&self,
writer: &mut JsonWriter<impl io::Write>,
with_stats: bool,
) -> io::Result<()> {
writer.write_event(JsonEvent::StartObject)?;
writer.write_event(JsonEvent::ObjectKey("name"))?;
writer.write_event(JsonEvent::String(&self.node_label()))?;
if with_stats {
writer.write_event(JsonEvent::ObjectKey("number of results"))?;
writer.write_event(JsonEvent::Number(&self.exec_count.get().to_string()))?;
writer.write_event(JsonEvent::ObjectKey("duration in seconds"))?;
writer.write_event(JsonEvent::Number(
&self.exec_duration.get().as_secs_f32().to_string(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("children"))?;
writer.write_event(JsonEvent::StartArray)?;
for child in &self.children {
child.json_node(writer, with_stats)?;
}
writer.write_event(JsonEvent::EndArray)?;
writer.write_event(JsonEvent::EndObject)
}
fn node_label(&self) -> String {
match self.node.as_ref() {
PlanNode::Aggregate {
key_variables,
aggregates,
..
} => format!(
"Aggregate({})",
key_variables
.iter()
.map(|c| c.to_string())
.chain(aggregates.iter().map(|(agg, v)| format!("{agg} -> {v}")))
.collect::<Vec<_>>()
.join(", ")
),
PlanNode::AntiJoin { .. } => "AntiJoin".to_owned(),
PlanNode::Extend {
expression,
variable,
..
} => format!("Extend({expression} -> {variable})"),
PlanNode::Filter { expression, .. } => format!("Filter({expression})"),
PlanNode::ForLoopJoin { .. } => "ForLoopJoin".to_owned(),
PlanNode::ForLoopLeftJoin { .. } => "ForLoopLeftJoin".to_owned(),
PlanNode::HashDeduplicate { .. } => "HashDeduplicate".to_owned(),
PlanNode::HashJoin { .. } => "HashJoin".to_owned(),
PlanNode::HashLeftJoin { expression, .. } => format!("HashLeftJoin({expression})"),
PlanNode::Limit { count, .. } => format!("Limit({count})"),
PlanNode::PathPattern {
subject,
path,
object,
graph_name,
} => format!("PathPattern({subject} {path} {object} {graph_name})"),
PlanNode::Project { mapping, .. } => {
format!(
"Project({})",
mapping
.iter()
.map(|(f, t)| if f.plain == t.plain {
f.to_string()
} else {
format!("{f} -> {t}")
})
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::QuadPattern {
subject,
predicate,
object,
graph_name,
} => format!("QuadPattern({subject} {predicate} {object} {graph_name})"),
PlanNode::Reduced { .. } => "Reduced".to_owned(),
PlanNode::Service {
service_name,
silent,
..
} => {
if *silent {
format!("SilentService({service_name})")
} else {
format!("Service({service_name})")
}
}
PlanNode::Skip { count, .. } => format!("Skip({count})"),
PlanNode::Sort { by, .. } => {
format!(
"Sort({})",
by.iter()
.map(|c| c.to_string())
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::StaticBindings { variables, .. } => {
format!(
"StaticBindings({})",
variables
.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)
}
PlanNode::Union { .. } => "Union".to_owned(),
}
}
}
impl fmt::Debug for PlanNodeWithStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut obj = f.debug_struct("Node");
obj.field("name", &self.node_label());
if self.exec_duration.get() > Duration::default() {
obj.field("number of results", &self.exec_count.get());
obj.field("duration in seconds", &self.exec_duration.get());
}
if !self.children.is_empty() {
obj.field("children", &self.children);
}
obj.finish()
}
}

@ -130,11 +130,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
self.base_iri.clone(),
self.options.query_options.service_handler(),
Rc::new(self.options.query_options.custom_functions.clone()),
false,
);
let mut bnodes = HashMap::new();
let (eval, _) = evaluator.plan_evaluator(Rc::new(plan));
let tuples =
evaluator.plan_evaluator(Rc::new(plan))(EncodedTuple::with_capacity(variables.len()))
.collect::<Result<Vec<_>, _>>()?; //TODO: would be much better to stream
eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; //TODO: would be much better to stream
for tuple in tuples {
for quad in delete {
if let Some(quad) =

@ -29,8 +29,8 @@ use crate::io::{
};
use crate::model::*;
use crate::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update,
UpdateOptions,
evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions,
QueryResults, Update, UpdateOptions,
};
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
#[cfg(not(target_family = "wasm"))]
@ -207,7 +207,37 @@ impl Store {
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
) -> Result<QueryResults, EvaluationError> {
evaluate_query(self.storage.snapshot(), query, options)
let (results, _) = self.explain_query_opt(query, options, false)?;
results
}
/// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/) with some options and
/// returns a query explanation with some statistics (if enabled with the `with_stats` parameter).
///
/// Beware: if you want to compute statistics you need to exhaust the results iterator before having a look at them.
///
/// Usage example serialising the explanation with statistics in JSON:
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
///
/// let store = Store::new()?;
/// if let (Ok(QueryResults::Solutions(solutions)), explanation) = store.explain_query_opt("SELECT ?s WHERE { VALUES ?s { 1 2 3 } }", QueryOptions::default(), true)? {
/// // We make sure to have read all the solutions
/// for _ in solutions {
/// }
/// let mut buf = Vec::new();
/// explanation.write_in_json(&mut buf)?;
/// }
/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn explain_query_opt(
&self,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
with_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
evaluate_query(self.storage.snapshot(), query, options, with_stats)
}
/// Retrieves quads with a filter on each quad component
@ -918,7 +948,8 @@ impl<'a> Transaction<'a> {
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
) -> Result<QueryResults, EvaluationError> {
evaluate_query(self.writer.reader(), query, options)
let (results, _) = evaluate_query(self.writer.reader(), query, options, false)?;
results
}
/// Retrieves quads with a filter on each quad component.

@ -12,11 +12,11 @@ use std::fs::{create_dir, remove_dir_all, File};
use std::io::Cursor;
#[cfg(not(target_family = "wasm"))]
use std::io::Write;
#[cfg(not(target_family = "wasm"))]
#[cfg(target_os = "linux")]
use std::iter::once;
#[cfg(not(target_family = "wasm"))]
use std::path::{Path, PathBuf};
#[cfg(not(target_family = "wasm"))]
#[cfg(target_os = "linux")]
use std::process::Command;
const DATA: &str = r#"
@prefix schema: <http://schema.org/> .
@ -501,7 +501,7 @@ fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
Ok(())
}
#[cfg(not(target_family = "wasm"))]
#[cfg(target_os = "linux")]
fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> {
assert!(Command::new("git")
.args(["clean", "-fX", dir])

@ -7,7 +7,7 @@ use oxigraph::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerialize
use oxigraph::model::{
GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode,
};
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::sparql::{Query, QueryOptions, QueryResults, Update};
use oxigraph::store::{BulkLoader, LoaderError, Store};
use oxiri::Iri;
use rand::random;
@ -175,6 +175,23 @@ enum Command {
/// By default the format is guessed from the results file extension.
#[arg(long, required_unless_present = "results_file")]
results_format: Option<String>,
/// Prints to stderr a human-readable explanation of the query evaluation.
///
/// Use the stats option to print also query evaluation statistics.
#[arg(long, conflicts_with = "explain_file")]
explain: bool,
/// Write to the given file an explanation of the query evaluation.
///
/// If the file extension is .json the JSON format is used, if .txt a human readable format is used.
///
/// Use the stats option to print also query evaluation statistics.
#[arg(long, conflicts_with = "explain")]
explain_file: Option<PathBuf>,
/// Computes some evaluation statistics to print as part of the query explanations.
///
/// Beware, computing the statistics adds some overhead to the evaluation runtime.
#[arg(long)]
stats: bool,
},
/// Executes a SPARQL update against the store.
Update {
@ -424,6 +441,9 @@ pub fn main() -> anyhow::Result<()> {
query_base,
results_file,
results_format,
explain,
explain_file,
stats,
} => {
let query = if let Some(query) = query {
query
@ -443,107 +463,130 @@ pub fn main() -> anyhow::Result<()> {
.location
.ok_or_else(|| anyhow!("The --location argument is required"))?,
)?;
match store.query(query)? {
QueryResults::Solutions(solutions) => {
let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) {
format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name) {
format
let (results, explanation) =
store.explain_query_opt(query, QueryOptions::default(), stats)?;
let print_result = (|| {
match results? {
QueryResults::Solutions(solutions) => {
let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) {
format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name)
{
format
} else {
bail!("The file format '{name}' is unknown")
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
QueryResultsFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The file format '{name}' is unknown")
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(
BufWriter::new(File::create(results_file)?),
solutions.variables().to_vec(),
)?;
for solution in solutions {
writer.write(&solution?)?;
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(stdout.lock(), solutions.variables().to_vec())?;
for solution in solutions {
writer.write(&solution?)?;
}
let _ = writer.finish()?;
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
QueryResultsFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(
}
QueryResults::Boolean(result) => {
let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) {
format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name)
{
format
} else {
bail!("The file format '{name}' is unknown")
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
QueryResultsFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
QueryResultsSerializer::from_format(format).write_boolean_result(
BufWriter::new(File::create(results_file)?),
solutions.variables().to_vec(),
result,
)?;
for solution in solutions {
writer.write(&solution?)?;
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(stdout.lock(), solutions.variables().to_vec())?;
for solution in solutions {
writer.write(&solution?)?;
}
let _ = writer.finish()?;
}
}
QueryResults::Boolean(result) => {
let format = if let Some(name) = results_format {
if let Some(format) = QueryResultsFormat::from_extension(&name) {
format
} else if let Some(format) = QueryResultsFormat::from_media_type(&name) {
format
} else {
bail!("The file format '{name}' is unknown")
let _ = QueryResultsSerializer::from_format(format)
.write_boolean_result(stdout().lock(), result)?;
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
QueryResultsFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
QueryResultsSerializer::from_format(format).write_boolean_result(
BufWriter::new(File::create(results_file)?),
result,
)?;
} else {
let _ = QueryResultsSerializer::from_format(format)
.write_boolean_result(stdout().lock(), result)?;
}
}
QueryResults::Graph(triples) => {
let format = if let Some(name) = results_format {
if let Some(format) = GraphFormat::from_extension(&name) {
format
} else if let Some(format) = GraphFormat::from_media_type(&name) {
format
QueryResults::Graph(triples) => {
let format = if let Some(name) = results_format {
if let Some(format) = GraphFormat::from_extension(&name) {
format
} else if let Some(format) = GraphFormat::from_media_type(&name) {
format
} else {
bail!("The file format '{name}' is unknown")
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
GraphFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The file format '{name}' is unknown")
}
} else if let Some(results_file) = &results_file {
format_from_path(results_file, |ext| {
GraphFormat::from_extension(ext)
.ok_or_else(|| anyhow!("The file extension '{ext}' is unknown"))
})?
} else {
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
let mut writer = GraphSerializer::from_format(format)
.triple_writer(BufWriter::new(File::create(results_file)?))?;
for triple in triples {
writer.write(triple?.as_ref())?;
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer =
GraphSerializer::from_format(format).triple_writer(stdout.lock())?;
for triple in triples {
writer.write(triple?.as_ref())?;
bail!("The --results-format option must be set when writing to stdout")
};
if let Some(results_file) = results_file {
let mut writer = GraphSerializer::from_format(format)
.triple_writer(BufWriter::new(File::create(results_file)?))?;
for triple in triples {
writer.write(triple?.as_ref())?;
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = GraphSerializer::from_format(format)
.triple_writer(stdout.lock())?;
for triple in triples {
writer.write(triple?.as_ref())?;
}
writer.finish()?;
}
writer.finish()?;
}
}
Ok(())
})();
if let Some(explain_file) = explain_file {
let mut file = BufWriter::new(File::create(&explain_file)?);
match explain_file
.extension()
.and_then(|e| e.to_str()) {
Some("json") => {
explanation.write_in_json(file)?;
},
Some("txt") => {
write!(file, "{:?}", explanation)?;
},
_ => bail!("The given explanation file {} must have an extension that is .json or .txt", explain_file.display())
}
} else if explain || stats {
eprintln!("{:?}", explanation);
}
Ok(())
print_result
}
Command::Update {
update,

Loading…
Cancel
Save