Avoids not useful Arc and clones in SPARQL evaluation

Avoids recursive calls to next()
pull/10/head
Tpt 5 years ago
parent 9dc2e5da90
commit d2a5d7d83a
  1. 207
      lib/src/sparql/eval.rs
  2. 2
      lib/src/sparql/mod.rs

@ -26,11 +26,10 @@ use std::collections::BTreeMap;
use std::collections::HashSet; use std::collections::HashSet;
use std::convert::TryInto; use std::convert::TryInto;
use std::fmt::Write; use std::fmt::Write;
use std::iter::once;
use std::iter::Iterator; use std::iter::Iterator;
use std::iter::{empty, once};
use std::ops::Deref; use std::ops::Deref;
use std::str; use std::str;
use std::sync::Arc;
use std::sync::Mutex; use std::sync::Mutex;
use std::u64; use std::u64;
use uuid::Uuid; use uuid::Uuid;
@ -39,11 +38,10 @@ const REGEX_SIZE_LIMIT: usize = 1_000_000;
type EncodedTuplesIterator<'a> = Box<dyn Iterator<Item = Result<EncodedTuple>> + 'a>; type EncodedTuplesIterator<'a> = Box<dyn Iterator<Item = Result<EncodedTuple>> + 'a>;
#[derive(Clone)]
pub struct SimpleEvaluator<S: StoreConnection> { pub struct SimpleEvaluator<S: StoreConnection> {
dataset: DatasetView<S>, dataset: DatasetView<S>,
bnodes_map: Arc<Mutex<BTreeMap<u64, Uuid>>>, bnodes_map: Mutex<BTreeMap<u64, Uuid>>,
base_iri: Option<Arc<Iri<String>>>, base_iri: Option<Iri<String>>,
now: DateTime<FixedOffset>, now: DateTime<FixedOffset>,
} }
@ -51,8 +49,8 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
pub fn new(dataset: S, base_iri: Option<Iri<String>>) -> Self { pub fn new(dataset: S, base_iri: Option<Iri<String>>) -> Self {
Self { Self {
dataset: DatasetView::new(dataset), dataset: DatasetView::new(dataset),
bnodes_map: Arc::new(Mutex::new(BTreeMap::default())), bnodes_map: Mutex::new(BTreeMap::default()),
base_iri: base_iri.map(Arc::new), base_iri,
now: Utc::now().with_timezone(&FixedOffset::east(0)), now: Utc::now().with_timezone(&FixedOffset::east(0)),
} }
} }
@ -91,7 +89,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
'a: 'b, 'a: 'b,
{ {
Ok(QueryResult::Graph(Box::new(ConstructIterator { Ok(QueryResult::Graph(Box::new(ConstructIterator {
dataset: self.dataset.clone(), eval: self,
iter: self.eval_plan(plan, vec![]), iter: self.eval_plan(plan, vec![]),
template: construct, template: construct,
buffered_results: Vec::default(), buffered_results: Vec::default(),
@ -104,9 +102,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
'a: 'b, 'a: 'b,
{ {
Ok(QueryResult::Graph(Box::new(DescribeIterator { Ok(QueryResult::Graph(Box::new(DescribeIterator {
dataset: self.dataset.clone(), eval: self,
iter: self.eval_plan(plan, vec![]), iter: self.eval_plan(plan, vec![]),
quads: Vec::default(), quads: Box::new(empty()),
}))) })))
} }
@ -228,7 +226,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
let mut filtered_from = from.clone(); let mut filtered_from = from.clone();
unbind_variables(&mut filtered_from, &problem_vars); unbind_variables(&mut filtered_from, &problem_vars);
let iter = LeftJoinIterator { let iter = LeftJoinIterator {
eval: self.clone(), eval: self,
right_plan: &*right, right_plan: &*right,
left_iter: self.eval_plan(&*left, filtered_from), left_iter: self.eval_plan(&*left, filtered_from),
current_right: Vec::default(), current_right: Vec::default(),
@ -244,7 +242,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
} }
PlanNode::Filter { child, expression } => { PlanNode::Filter { child, expression } => {
let eval = self.clone(); let eval = self;
Box::new(self.eval_plan(&*child, from).filter(move |tuple| { Box::new(self.eval_plan(&*child, from).filter(move |tuple| {
match tuple { match tuple {
Ok(tuple) => eval Ok(tuple) => eval
@ -256,17 +254,19 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
})) }))
} }
PlanNode::Union { entry, children } => Box::new(UnionIterator { PlanNode::Union { entry, children } => Box::new(UnionIterator {
eval: self.clone(), eval: self,
children_plan: &children, children_plan: &children,
input_iter: self.eval_plan(&*entry, from), input_iter: self.eval_plan(&*entry, from),
current: Vec::default(), current_input: Vec::default(),
current_iterator: Box::new(empty()),
current_child: children.len(),
}), }),
PlanNode::Extend { PlanNode::Extend {
child, child,
position, position,
expression, expression,
} => { } => {
let eval = self.clone(); let eval = self;
Box::new(self.eval_plan(&*child, from).map(move |tuple| { Box::new(self.eval_plan(&*child, from).map(move |tuple| {
let mut tuple = tuple?; let mut tuple = tuple?;
if let Some(value) = eval.eval_expression(&expression, &tuple) { if let Some(value) = eval.eval_expression(&expression, &tuple) {
@ -1275,11 +1275,11 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
where where
'a: 'b, 'a: 'b,
{ {
let dataset = self.dataset.clone(); let eval = self;
BindingsIterator::new( BindingsIterator::new(
variables, variables,
Box::new(iter.map(move |values| { Box::new(iter.map(move |values| {
let encoder = dataset.encoder(); let encoder = eval.dataset.encoder();
values? values?
.into_iter() .into_iter()
.map(|value| { .map(|value| {
@ -1531,17 +1531,16 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
} }
#[derive(Clone)]
struct DatasetView<S: StoreConnection> { struct DatasetView<S: StoreConnection> {
store: S, store: S,
extra: Arc<MemoryStringStore>, extra: MemoryStringStore,
} }
impl<S: StoreConnection> DatasetView<S> { impl<S: StoreConnection> DatasetView<S> {
fn new(store: S) -> Self { fn new(store: S) -> Self {
Self { Self {
store, store,
extra: Arc::new(MemoryStringStore::default()), extra: MemoryStringStore::default(),
} }
} }
@ -1751,19 +1750,20 @@ impl<'a> Iterator for JoinIterator<'a> {
type Item = Result<EncodedTuple>; type Item = Result<EncodedTuple>;
fn next(&mut self) -> Option<Result<EncodedTuple>> { fn next(&mut self) -> Option<Result<EncodedTuple>> {
if let Some(result) = self.buffered_results.pop() { loop {
return Some(result); if let Some(result) = self.buffered_results.pop() {
} return Some(result);
let right_tuple = match self.right_iter.next()? { }
Ok(right_tuple) => right_tuple, let right_tuple = match self.right_iter.next()? {
Err(error) => return Some(Err(error)), Ok(right_tuple) => right_tuple,
}; Err(error) => return Some(Err(error)),
for left_tuple in &self.left { };
if let Some(result_tuple) = combine_tuples(left_tuple, &right_tuple) { for left_tuple in &self.left {
self.buffered_results.push(Ok(result_tuple)) if let Some(result_tuple) = combine_tuples(left_tuple, &right_tuple) {
self.buffered_results.push(Ok(result_tuple))
}
} }
} }
self.next()
} }
} }
@ -1793,7 +1793,7 @@ impl<'a> Iterator for AntiJoinIterator<'a> {
} }
struct LeftJoinIterator<'a, S: StoreConnection + 'a> { struct LeftJoinIterator<'a, S: StoreConnection + 'a> {
eval: SimpleEvaluator<S>, eval: &'a SimpleEvaluator<S>,
right_plan: &'a PlanNode, right_plan: &'a PlanNode,
left_iter: EncodedTuplesIterator<'a>, left_iter: EncodedTuplesIterator<'a>,
current_right: Vec<Result<EncodedTuple>>, //TODO: keep using an iterator? current_right: Vec<Result<EncodedTuple>>, //TODO: keep using an iterator?
@ -1861,29 +1861,37 @@ impl<'a, S: StoreConnection> Iterator for BadLeftJoinIterator<'a, S> {
} }
struct UnionIterator<'a, S: StoreConnection + 'a> { struct UnionIterator<'a, S: StoreConnection + 'a> {
eval: SimpleEvaluator<S>, eval: &'a SimpleEvaluator<S>,
children_plan: &'a Vec<PlanNode>, children_plan: &'a [PlanNode],
input_iter: EncodedTuplesIterator<'a>, input_iter: EncodedTuplesIterator<'a>,
current: Vec<Result<EncodedTuple>>, //TODO: avoid current_input: EncodedTuple,
current_iterator: EncodedTuplesIterator<'a>,
current_child: usize,
} }
impl<'a, S: StoreConnection> Iterator for UnionIterator<'a, S> { impl<'a, S: StoreConnection> Iterator for UnionIterator<'a, S> {
type Item = Result<EncodedTuple>; type Item = Result<EncodedTuple>;
fn next(&mut self) -> Option<Result<EncodedTuple>> { fn next(&mut self) -> Option<Result<EncodedTuple>> {
if let Some(tuple) = self.current.pop() { loop {
return Some(tuple); if let Some(tuple) = self.current_iterator.next() {
} return Some(tuple);
match self.input_iter.next()? { }
Ok(input_tuple) => { if self.current_child == self.children_plan.len() {
for plan in self.children_plan { match self.input_iter.next()? {
self.current Ok(input_tuple) => {
.extend(self.eval.eval_plan(plan, input_tuple.clone())); self.current_input = input_tuple;
self.current_child = 0;
}
Err(error) => return Some(Err(error)),
} }
} }
Err(error) => return Some(Err(error)), self.current_iterator = self.eval.eval_plan(
&self.children_plan[self.current_child],
self.current_input.clone(),
);
self.current_child += 1;
} }
self.next()
} }
} }
@ -1896,21 +1904,21 @@ impl<'a> Iterator for HashDeduplicateIterator<'a> {
type Item = Result<EncodedTuple>; type Item = Result<EncodedTuple>;
fn next(&mut self) -> Option<Result<EncodedTuple>> { fn next(&mut self) -> Option<Result<EncodedTuple>> {
match self.iter.next()? { loop {
Ok(tuple) => { match self.iter.next()? {
if self.already_seen.insert(tuple.clone()) { Ok(tuple) => {
Some(Ok(tuple)) if self.already_seen.insert(tuple.clone()) {
} else { return Some(Ok(tuple));
self.next() }
} }
Err(error) => return Some(Err(error)),
} }
Err(error) => Some(Err(error)),
} }
} }
} }
struct ConstructIterator<'a, S: StoreConnection> { struct ConstructIterator<'a, S: StoreConnection> {
dataset: DatasetView<S>, eval: &'a SimpleEvaluator<S>,
iter: EncodedTuplesIterator<'a>, iter: EncodedTuplesIterator<'a>,
template: &'a [TripleTemplate], template: &'a [TripleTemplate],
buffered_results: Vec<Result<Triple>>, buffered_results: Vec<Result<Triple>>,
@ -1921,31 +1929,32 @@ impl<'a, S: StoreConnection> Iterator for ConstructIterator<'a, S> {
type Item = Result<Triple>; type Item = Result<Triple>;
fn next(&mut self) -> Option<Result<Triple>> { fn next(&mut self) -> Option<Result<Triple>> {
if let Some(result) = self.buffered_results.pop() { loop {
return Some(result); if let Some(result) = self.buffered_results.pop() {
} return Some(result);
{ }
let tuple = match self.iter.next()? { {
Ok(tuple) => tuple, let tuple = match self.iter.next()? {
Err(error) => return Some(Err(error)), Ok(tuple) => tuple,
}; Err(error) => return Some(Err(error)),
let encoder = self.dataset.encoder(); };
for template in self.template { let encoder = self.eval.dataset.encoder();
if let (Some(subject), Some(predicate), Some(object)) = ( for template in self.template {
get_triple_template_value(&template.subject, &tuple, &mut self.bnodes), if let (Some(subject), Some(predicate), Some(object)) = (
get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes), get_triple_template_value(&template.subject, &tuple, &mut self.bnodes),
get_triple_template_value(&template.object, &tuple, &mut self.bnodes), get_triple_template_value(&template.predicate, &tuple, &mut self.bnodes),
) { get_triple_template_value(&template.object, &tuple, &mut self.bnodes),
self.buffered_results ) {
.push(decode_triple(&encoder, subject, predicate, object)); self.buffered_results
} else { .push(decode_triple(&encoder, subject, predicate, object));
self.buffered_results.clear(); //No match, we do not output any triple for this row } else {
break; self.buffered_results.clear(); //No match, we do not output any triple for this row
break;
}
} }
self.bnodes.clear(); //We do not reuse old bnodes
} }
self.bnodes.clear(); //We do not reuse old bnodes
} }
self.next()
} }
} }
@ -1980,38 +1989,40 @@ fn decode_triple<S: StringStore>(
} }
struct DescribeIterator<'a, S: StoreConnection + 'a> { struct DescribeIterator<'a, S: StoreConnection + 'a> {
dataset: DatasetView<S>, eval: &'a SimpleEvaluator<S>,
iter: EncodedTuplesIterator<'a>, iter: EncodedTuplesIterator<'a>,
quads: Vec<Result<EncodedQuad>>, quads: Box<dyn Iterator<Item = Result<EncodedQuad>> + 'a>,
} }
impl<'a, S: StoreConnection> Iterator for DescribeIterator<'a, S> { impl<'a, S: StoreConnection> Iterator for DescribeIterator<'a, S> {
type Item = Result<Triple>; type Item = Result<Triple>;
fn next(&mut self) -> Option<Result<Triple>> { fn next(&mut self) -> Option<Result<Triple>> {
if let Some(quad) = self.quads.pop() { loop {
return Some(match quad { if let Some(quad) = self.quads.next() {
Ok(quad) => self return Some(match quad {
.dataset Ok(quad) => self
.encoder() .eval
.decode_quad(&quad) .dataset
.map(|q| q.into_triple()), .encoder()
Err(error) => Err(error), .decode_quad(&quad)
}); .map(|q| q.into_triple()),
} Err(error) => Err(error),
let tuple = match self.iter.next()? { });
Ok(tuple) => tuple, }
Err(error) => return Some(Err(error)), let tuple = match self.iter.next()? {
}; Ok(tuple) => tuple,
for subject in tuple { Err(error) => return Some(Err(error)),
if let Some(subject) = subject { };
self.quads = self for subject in tuple {
.dataset if let Some(subject) = subject {
.quads_for_pattern(Some(subject), None, None, None) self.quads =
.collect(); self.eval
.dataset
.quads_for_pattern(Some(subject), None, None, None);
}
} }
} }
self.next()
} }
} }

@ -27,7 +27,7 @@ pub use crate::sparql::model::Variable;
/// A prepared [SPARQL query](https://www.w3.org/TR/sparql11-query/) /// A prepared [SPARQL query](https://www.w3.org/TR/sparql11-query/)
pub trait PreparedQuery { pub trait PreparedQuery {
/// Evaluates the query and returns its results /// Evaluates the query and returns its results
fn exec(&self) -> Result<QueryResult<'_>>; fn exec(&self) -> Result<QueryResult>;
} }
/// An implementation of `PreparedQuery` for internal use /// An implementation of `PreparedQuery` for internal use

Loading…
Cancel
Save