[SPARQL] Adds an evaluation plan intermediate representation

pull/10/head
Tpt 6 years ago
parent 2ba00cd8b5
commit 654ee215d5
  1. 616
      lib/src/sparql/eval.rs
  2. 5
      lib/src/sparql/mod.rs
  3. 446
      lib/src/sparql/plan.rs

@ -1,5 +1,6 @@
use sparql::algebra::*; use sparql::algebra::*;
use std::collections::BTreeSet; use sparql::plan::*;
use std::collections::HashSet;
use std::iter::once; use std::iter::once;
use std::iter::Iterator; use std::iter::Iterator;
use std::sync::Arc; use std::sync::Arc;
@ -7,154 +8,21 @@ use store::numeric_encoder::EncodedTerm;
use store::store::EncodedQuadsStore; use store::store::EncodedQuadsStore;
use Result; use Result;
type EncodedBinding = Vec<Option<EncodedTerm>>; type EncodedTuplesIterator = Box<dyn Iterator<Item = Result<EncodedTuple>>>;
struct EncodedBindingsIterator { pub struct SimpleEvaluator<S: EncodedQuadsStore> {
variables: Vec<Variable>, store: Arc<S>,
iter: Box<dyn Iterator<Item = Result<EncodedBinding>>>,
}
impl EncodedBindingsIterator {
fn take(self, n: usize) -> Self {
EncodedBindingsIterator {
variables: self.variables,
iter: Box::new(self.iter.take(n)),
}
}
fn skip(self, n: usize) -> Self {
EncodedBindingsIterator {
variables: self.variables,
iter: Box::new(self.iter.skip(n)),
}
}
fn project(self, on_variables: Vec<Variable>) -> Self {
let EncodedBindingsIterator { variables, iter } = self;
let projection: Vec<(usize, usize)> = on_variables
.iter()
.enumerate()
.flat_map(|(new_pos, v)| slice_key(&variables, v).map(|old_pos| (old_pos, new_pos)))
.collect();
let new_len = on_variables.len();
EncodedBindingsIterator {
variables: on_variables,
iter: Box::new(iter.map(move |binding| {
let binding = binding?;
let mut new_binding = Vec::with_capacity(new_len);
new_binding.resize(new_len, None);
for (old_pos, new_pos) in &projection {
new_binding[*new_pos] = binding[*old_pos];
}
Ok(new_binding)
})),
}
}
fn unique(self) -> Self {
let EncodedBindingsIterator { variables, iter } = self;
let mut oks = BTreeSet::default();
let mut errors = Vec::default();
for element in iter {
match element {
Ok(ok) => {
oks.insert(ok);
}
Err(error) => errors.push(error),
}
}
EncodedBindingsIterator {
variables,
iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))),
}
}
fn chain(self, other: Self) -> Self {
let EncodedBindingsIterator {
variables: variables1,
iter: iter1,
} = self;
let EncodedBindingsIterator {
variables: variables2,
iter: iter2,
} = other;
let mut variables = variables1;
let mut map_2_to_1 = Vec::with_capacity(variables2.len());
for var in variables2 {
map_2_to_1.push(match slice_key(&variables, &var) {
Some(key) => key,
None => {
variables.push(var);
variables.len() - 1
}
})
}
let variables_len = variables.len();
EncodedBindingsIterator {
variables,
iter: Box::new(iter1.chain(iter2.map(move |binding| {
let binding = binding?;
let mut new_binding = binding.clone();
new_binding.resize(variables_len, None);
for (old_key, new_key) in map_2_to_1.iter().enumerate() {
new_binding[*new_key] = binding[old_key];
}
Ok(new_binding)
}))),
}
}
fn duplicate(self) -> (Self, Self) {
let EncodedBindingsIterator { variables, iter } = self;
//TODO: optimize
let mut oks = Vec::default();
let mut errors = Vec::default();
for element in iter {
match element {
Ok(ok) => {
oks.push(ok);
}
Err(error) => errors.push(error),
}
}
(
EncodedBindingsIterator {
variables: variables.clone(),
iter: Box::new(oks.clone().into_iter().map(Ok)),
},
EncodedBindingsIterator {
variables,
iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))),
},
)
}
}
impl Default for EncodedBindingsIterator {
fn default() -> Self {
EncodedBindingsIterator {
variables: Vec::default(),
iter: Box::new(once(Ok(Vec::default()))),
}
}
} }
fn slice_key<T: Eq>(slice: &[T], element: &T) -> Option<usize> { impl<S: EncodedQuadsStore> Clone for SimpleEvaluator<S> {
for (i, item) in slice.iter().enumerate() { fn clone(&self) -> Self {
if item == element { Self {
return Some(i); store: self.store.clone(),
} }
} }
None
} }
#[derive(Clone)] impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
pub struct SparqlEvaluator<S: EncodedQuadsStore> {
store: Arc<S>,
}
impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
pub fn new(store: Arc<S>) -> Self { pub fn new(store: Arc<S>) -> Self {
Self { store } Self { store }
} }
@ -162,329 +30,233 @@ impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
pub fn evaluate(&self, query: &Query) -> Result<QueryResult> { pub fn evaluate(&self, query: &Query) -> Result<QueryResult> {
match query { match query {
Query::SelectQuery { algebra, dataset } => { Query::SelectQuery { algebra, dataset } => {
Ok(QueryResult::Bindings(self.decode_bindings( let (plan, variables) = PlanBuilder::build(&*self.store, algebra)?;
self.eval_graph_pattern(algebra, EncodedBindingsIterator::default())?, let iter = self.eval_plan(plan, vec![None; variables.len()]);
))) Ok(QueryResult::Bindings(self.decode_bindings(iter, variables)))
} }
_ => unimplemented!(), _ => unimplemented!(),
} }
} }
fn eval_graph_pattern( fn eval_plan(&self, node: PlanNode, from: EncodedTuple) -> EncodedTuplesIterator {
&self, match node {
pattern: &GraphPattern, PlanNode::Init => Box::new(once(Ok(from))),
from: EncodedBindingsIterator, PlanNode::StaticBindings { tuples } => Box::new(tuples.into_iter().map(Ok)),
) -> Result<EncodedBindingsIterator> { PlanNode::TriplePatternJoin {
match pattern { child,
GraphPattern::BGP(p) => { subject,
let mut iter = from; predicate,
for pattern in p { object,
iter = match pattern { } => {
TripleOrPathPattern::Triple(pattern) => { let eval = self.clone();
self.eval_triple_pattern(pattern, iter) Box::new(
} self.eval_plan(*child, from)
TripleOrPathPattern::Path(pattern) => self.eval_path_pattern(pattern, iter), .flat_map(move |tuple| match tuple {
}?; Ok(tuple) => {
} let iter: EncodedTuplesIterator = match eval
Ok(iter) .store
} .quads_for_pattern(
GraphPattern::Join(a, b) => { get_pattern_value(&subject, &tuple),
self.eval_graph_pattern(b, self.eval_graph_pattern(a, from)?) get_pattern_value(&predicate, &tuple),
} get_pattern_value(&object, &tuple),
GraphPattern::LeftJoin(a, b, e) => unimplemented!(), None, //TODO
GraphPattern::Filter(e, p) => { ) {
let EncodedBindingsIterator { variables, iter } = Ok(mut iter) => {
self.eval_graph_pattern(p, from)?; if subject.is_var() && subject == predicate {
let expression = e.clone(); iter = Box::new(iter.filter(|quad| match quad {
let evaluator = Self { Err(_) => true,
store: self.store.clone(), Ok(quad) => quad.subject == quad.predicate,
}; }))
Ok(EncodedBindingsIterator { }
variables: variables.clone(), if subject.is_var() && subject == object {
iter: Box::new(iter.filter(move |val| match val { iter = Box::new(iter.filter(|quad| match quad {
Ok(binding) => { Err(_) => true,
match evaluator.eval_expression(&expression, binding, &variables) { Ok(quad) => quad.subject == quad.object,
Ok(Some(term)) => true, }))
_ => false, }
if predicate.is_var() && predicate == object {
iter = Box::new(iter.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => quad.predicate == quad.object,
}))
}
Box::new(iter.map(move |quad| {
let quad = quad?;
let mut new_tuple = tuple.clone();
put_pattern_value(
&subject,
quad.subject,
&mut new_tuple,
);
put_pattern_value(
&predicate,
quad.predicate,
&mut new_tuple,
);
put_pattern_value(&object, quad.object, &mut new_tuple);
Ok(new_tuple)
}))
}
Err(error) => Box::new(once(Err(error))),
};
iter
} }
} Err(error) => Box::new(once(Err(error))),
}),
)
}
PlanNode::Filter { child, expression } => {
let eval = self.clone();
Box::new(self.eval_plan(*child, from).filter(move |tuple| {
match tuple {
Ok(tuple) => eval
.eval_expression(&expression, tuple)
.and_then(|term| eval.to_bool(term))
.unwrap_or(false),
Err(_) => true, Err(_) => true,
})), }
}) }))
} }
GraphPattern::Union(a, b) => { PlanNode::Union { entry, children } => {
let (from1, from2) = from.duplicate(); //TODO: avoid clones
Ok(self let eval = self.clone();
.eval_graph_pattern(a, from1)? Box::new(self.eval_plan(*entry, from).flat_map(move |tuple| {
.chain(self.eval_graph_pattern(b, from2)?)) let eval = eval.clone();
let iter: EncodedTuplesIterator = match tuple {
Ok(tuple) => Box::new(
children
.clone()
.into_iter()
.flat_map(move |child| eval.eval_plan(child, tuple.clone())),
),
Err(error) => Box::new(once(Err(error))),
};
iter
}))
} }
GraphPattern::Graph(g, p) => unimplemented!(), PlanNode::HashDeduplicate { child } => {
GraphPattern::Extend(p, v, e) => unimplemented!(), let iter = self.eval_plan(*child, from);
GraphPattern::Minus(a, b) => unimplemented!(), let mut values = HashSet::with_capacity(iter.size_hint().0);
GraphPattern::Service(n, p, s) => unimplemented!(), let mut errors = Vec::default();
GraphPattern::AggregateJoin(g, a) => unimplemented!(), for result in iter {
GraphPattern::Data(bs) => Ok(self.encode_bindings(bs)), match result {
GraphPattern::OrderBy(l, o) => self.eval_graph_pattern(l, from), //TODO Ok(result) => {
GraphPattern::Project(l, new_variables) => Ok(self values.insert(result);
.eval_graph_pattern(l, from)? }
.project(new_variables.to_vec())), Err(error) => errors.push(Err(error)),
GraphPattern::Distinct(l) => Ok(self.eval_graph_pattern(l, from)?.unique()), }
GraphPattern::Reduced(l) => self.eval_graph_pattern(l, from),
GraphPattern::Slice(l, start, length) => {
let mut iter = self.eval_graph_pattern(l, from)?;
if *start > 0 {
iter = iter.skip(*start);
}
if let Some(length) = length {
iter = iter.take(*length);
} }
Ok(iter) Box::new(errors.into_iter().chain(values.into_iter().map(Ok)))
} }
} PlanNode::Skip { child, count } => Box::new(self.eval_plan(*child, from).skip(count)),
} PlanNode::Limit { child, count } => Box::new(self.eval_plan(*child, from).take(count)),
PlanNode::Project { child, mapping } => {
fn eval_triple_pattern( Box::new(self.eval_plan(*child, from).map(move |tuple| {
&self, let tuple = tuple?;
pattern: &TriplePattern, let mut new_tuple = Vec::with_capacity(mapping.len());
from: EncodedBindingsIterator, for key in &mapping {
) -> Result<EncodedBindingsIterator> { new_tuple.push(tuple[*key]);
let EncodedBindingsIterator {
mut variables,
iter: from_iter,
} = from;
let subject =
self.binding_value_lookup_from_term_or_variable(&pattern.subject, &mut variables)?;
let predicate = self
.binding_value_lookup_from_named_node_or_variable(&pattern.predicate, &mut variables)?;
let object =
self.binding_value_lookup_from_term_or_variable(&pattern.object, &mut variables)?;
let filter_sp = subject.is_var() && subject == predicate;
let filter_so = subject.is_var() && subject == object;
let filter_po = predicate.is_var() && predicate == object;
let store = self.store.clone();
let variables_len = variables.len();
Ok(EncodedBindingsIterator {
variables,
iter: Box::new(from_iter.flat_map(move |binding| {
let result: Box<dyn Iterator<Item = Result<EncodedBinding>>> = match binding {
Ok(mut binding) => {
match store.quads_for_pattern(
subject.get(&binding),
predicate.get(&binding),
object.get(&binding),
None, //TODO
) {
Ok(mut iter) => {
if filter_sp {
iter = Box::new(iter.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => quad.subject == quad.predicate,
}))
}
if filter_so {
iter = Box::new(iter.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => quad.subject == quad.object,
}))
}
if filter_po {
iter = Box::new(iter.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => quad.predicate == quad.object,
}))
}
Box::new(iter.map(move |quad| {
let quad = quad?;
let mut binding = binding.clone();
binding.resize(variables_len, None);
subject.put(quad.subject, &mut binding);
predicate.put(quad.predicate, &mut binding);
object.put(quad.object, &mut binding);
Ok(binding)
}))
}
Err(error) => Box::new(once(Err(error))),
}
} }
Err(error) => Box::new(once(Err(error))), Ok(new_tuple)
}; }))
result }
})), }
})
}
fn eval_path_pattern(
&self,
pattern: &PathPattern,
from: EncodedBindingsIterator,
) -> Result<EncodedBindingsIterator> {
unimplemented!()
} }
fn eval_expression( fn eval_expression(
&self, &self,
expr: &Expression, expression: &PlanExpression,
binding: &[Option<EncodedTerm>], tuple: &[Option<EncodedTerm>],
variables: &[Variable], ) -> Option<EncodedTerm> {
) -> Result<Option<EncodedTerm>> { match expression {
match expr { PlanExpression::Constant(t) => Some(*t),
Expression::ConstantExpression(TermOrVariable::Term(t)) => { PlanExpression::Variable(v) => if *v < tuple.len() {
Ok(Some(self.store.encoder().encode_term(t)?)) tuple[*v]
} } else {
Expression::ConstantExpression(TermOrVariable::Variable(v)) => { None
Ok(slice_key(variables, v).and_then(|key| binding[key])) },
} PlanExpression::Or(a, b) => match self.to_bool(self.eval_expression(a, tuple)?) {
Expression::OrExpression(a, b) => Ok(match self
.to_bool(self.eval_expression(a, binding, variables)?)?
{
Some(true) => Some(true.into()), Some(true) => Some(true.into()),
Some(false) => self.eval_expression(b, binding, variables)?, Some(false) => self.eval_expression(b, tuple),
None => match self.to_bool(self.eval_expression(b, binding, variables)?)? { None => match self.to_bool(self.eval_expression(b, tuple)?) {
Some(true) => Some(true.into()), Some(true) => Some(true.into()),
_ => None, _ => None,
}, },
}), },
Expression::AndExpression(a, b) => Ok(match self PlanExpression::And(a, b) => match self.to_bool(self.eval_expression(a, tuple)?) {
.to_bool(self.eval_expression(a, binding, variables)?)? Some(true) => self.eval_expression(b, tuple),
{
Some(true) => self.eval_expression(b, binding, variables)?,
Some(false) => Some(false.into()), Some(false) => Some(false.into()),
None => match self.to_bool(self.eval_expression(b, binding, variables)?)? { None => match self.to_bool(self.eval_expression(b, tuple)?) {
Some(false) => Some(false.into()), Some(false) => Some(false.into()),
_ => None, _ => None,
}, },
}), },
Expression::UnaryNotExpression(e) => Ok(self PlanExpression::UnaryNot(e) => self
.to_bool(self.eval_expression(e, binding, variables)?)? .to_bool(self.eval_expression(e, tuple)?)
.map(|v| (!v).into())), .map(|v| (!v).into()),
e => Err(format!("Evaluation of expression {} is not implemented yet", e).into()), e => unimplemented!(),
} }
} }
fn to_bool(&self, term: Option<EncodedTerm>) -> Result<Option<bool>> { fn to_bool(&self, term: EncodedTerm) -> Option<bool> {
Ok(match term { match term {
Some(EncodedTerm::BooleanLiteral(value)) => Some(value), EncodedTerm::BooleanLiteral(value) => Some(value),
Some(EncodedTerm::NamedNode { .. }) => None, EncodedTerm::NamedNode { .. } => None,
Some(EncodedTerm::BlankNode(_)) => None, EncodedTerm::BlankNode(_) => None,
Some(term) => self.store.encoder().decode_term(term)?.to_bool(), term => self.store.encoder().decode_term(term).ok()?.to_bool(),
None => None,
})
}
fn binding_value_lookup_from_term_or_variable(
&self,
term_or_variable: &TermOrVariable,
variables: &mut Vec<Variable>,
) -> Result<BindingValueLookup> {
Ok(match term_or_variable {
TermOrVariable::Term(term) => {
BindingValueLookup::Constant(self.store.encoder().encode_term(term)?)
}
TermOrVariable::Variable(variable) => {
BindingValueLookup::Variable(match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
})
}
})
}
fn binding_value_lookup_from_named_node_or_variable(
&self,
named_node_or_variable: &NamedNodeOrVariable,
variables: &mut Vec<Variable>,
) -> Result<BindingValueLookup> {
Ok(match named_node_or_variable {
NamedNodeOrVariable::NamedNode(named_node) => {
BindingValueLookup::Constant(self.store.encoder().encode_named_node(named_node)?)
}
NamedNodeOrVariable::Variable(variable) => {
BindingValueLookup::Variable(match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
})
}
})
}
fn encode_bindings(&self, bindings: &StaticBindings) -> EncodedBindingsIterator {
let encoder = self.store.encoder();
let encoded_values: Vec<Result<EncodedBinding>> = bindings
.values_iter()
.map(move |values| {
let mut result = Vec::with_capacity(values.len());
for value in values {
result.push(match value {
Some(term) => Some(encoder.encode_term(term)?),
None => None,
});
}
Ok(result)
}).collect();
EncodedBindingsIterator {
variables: bindings.variables().to_vec(),
iter: Box::new(encoded_values.into_iter()),
} }
} }
fn decode_bindings(&self, iter: EncodedBindingsIterator) -> BindingsIterator { fn decode_bindings(
&self,
iter: EncodedTuplesIterator,
variables: Vec<Variable>,
) -> BindingsIterator {
let store = self.store.clone(); let store = self.store.clone();
let EncodedBindingsIterator { variables, iter } = iter;
BindingsIterator::new( BindingsIterator::new(
variables, variables,
Box::new(iter.map(move |values| { Box::new(iter.map(move |values| {
let values = values?;
let encoder = store.encoder(); let encoder = store.encoder();
let mut result = Vec::with_capacity(values.len()); values?
for value in values { .into_iter()
result.push(match value { .map(|value| {
Some(term) => Some(encoder.decode_term(term)?), Ok(match value {
None => None, Some(term) => Some(encoder.decode_term(term)?),
}); None => None,
} })
Ok(result) }).collect()
})), })),
) )
} }
} }
#[derive(PartialEq, Eq, Clone, Copy)] fn get_pattern_value(
enum BindingValueLookup { selector: &PatternValue,
Constant(EncodedTerm), tuple: &[Option<EncodedTerm>],
Variable(usize), ) -> Option<EncodedTerm> {
match selector {
PatternValue::Constant(term) => Some(*term),
PatternValue::Variable(v) => if *v < tuple.len() {
tuple[*v]
} else {
None
},
}
} }
impl BindingValueLookup { fn put_pattern_value(selector: &PatternValue, value: EncodedTerm, tuple: &mut EncodedTuple) {
fn get(&self, binding: &[Option<EncodedTerm>]) -> Option<EncodedTerm> { match selector {
match self { PatternValue::Constant(_) => (),
BindingValueLookup::Constant(term) => Some(*term), PatternValue::Variable(v) => {
BindingValueLookup::Variable(v) => if *v < binding.len() { let v = *v;
binding[*v] if tuple.len() > v {
tuple[v] = Some(value)
} else { } else {
None if tuple.len() < v {
}, tuple.resize(v, None);
} }
} tuple.push(Some(value))
}
fn put(&self, value: EncodedTerm, binding: &mut EncodedBinding) {
match self {
BindingValueLookup::Constant(_) => (),
BindingValueLookup::Variable(v) => binding[*v] = Some(value),
}
}
fn is_var(&self) -> bool {
match self {
BindingValueLookup::Constant(_) => false,
BindingValueLookup::Variable(_) => true,
} }
} }
} }

@ -3,7 +3,7 @@
use model::Dataset; use model::Dataset;
use sparql::algebra::QueryResult; use sparql::algebra::QueryResult;
use sparql::eval::SparqlEvaluator; use sparql::eval::SimpleEvaluator;
use sparql::parser::read_sparql_query; use sparql::parser::read_sparql_query;
use std::io::Read; use std::io::Read;
use store::store::EncodedQuadsStore; use store::store::EncodedQuadsStore;
@ -13,6 +13,7 @@ use Result;
pub mod algebra; pub mod algebra;
mod eval; mod eval;
pub mod parser; pub mod parser;
mod plan;
pub mod xml_results; pub mod xml_results;
pub trait SparqlDataset: Dataset { pub trait SparqlDataset: Dataset {
@ -22,6 +23,6 @@ pub trait SparqlDataset: Dataset {
impl<S: EncodedQuadsStore> SparqlDataset for StoreDataset<S> { impl<S: EncodedQuadsStore> SparqlDataset for StoreDataset<S> {
fn query(&self, query: impl Read) -> Result<QueryResult> { fn query(&self, query: impl Read) -> Result<QueryResult> {
let query = read_sparql_query(query, None)?; let query = read_sparql_query(query, None)?;
SparqlEvaluator::new(self.encoded()).evaluate(&query) SimpleEvaluator::new(self.encoded()).evaluate(&query)
} }
} }

@ -0,0 +1,446 @@
use sparql::algebra::*;
use store::numeric_encoder::EncodedTerm;
use store::store::EncodedQuadsStore;
use Result;
pub type EncodedTuple = Vec<Option<EncodedTerm>>;
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum PlanNode {
Init,
StaticBindings {
tuples: Vec<EncodedTuple>,
},
TriplePatternJoin {
child: Box<PlanNode>,
subject: PatternValue,
predicate: PatternValue,
object: PatternValue,
},
Filter {
child: Box<PlanNode>,
expression: PlanExpression,
},
Union {
entry: Box<PlanNode>,
children: Vec<PlanNode>,
},
HashDeduplicate {
child: Box<PlanNode>,
},
Skip {
child: Box<PlanNode>,
count: usize,
},
Limit {
child: Box<PlanNode>,
count: usize,
},
Project {
child: Box<PlanNode>,
mapping: Vec<usize>, // for each key in children the key of the returned vector (children is sliced at the vector length)
},
}
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub enum PatternValue {
Constant(EncodedTerm),
Variable(usize),
}
impl PatternValue {
pub fn is_var(&self) -> bool {
match self {
PatternValue::Constant(_) => false,
PatternValue::Variable(_) => true,
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum PlanExpression {
Constant(EncodedTerm),
Variable(usize),
Or(Box<PlanExpression>, Box<PlanExpression>),
And(Box<PlanExpression>, Box<PlanExpression>),
Equal(Box<PlanExpression>, Box<PlanExpression>),
NotEqual(Box<PlanExpression>, Box<PlanExpression>),
Greater(Box<PlanExpression>, Box<PlanExpression>),
GreaterOrEq(Box<PlanExpression>, Box<PlanExpression>),
Lower(Box<PlanExpression>, Box<PlanExpression>),
LowerOrEq(Box<PlanExpression>, Box<PlanExpression>),
//In(Box<PlanExpression>, Vec<PlanExpression>),
//NotIn(Box<PlanExpression>, Vec<PlanExpression>),
Add(Box<PlanExpression>, Box<PlanExpression>),
Sub(Box<PlanExpression>, Box<PlanExpression>),
Mul(Box<PlanExpression>, Box<PlanExpression>),
Div(Box<PlanExpression>, Box<PlanExpression>),
UnaryPlus(Box<PlanExpression>),
UnaryMinus(Box<PlanExpression>),
UnaryNot(Box<PlanExpression>),
Str(Box<PlanExpression>),
Lang(Box<PlanExpression>),
LangMatches(Box<PlanExpression>, Box<PlanExpression>),
Datatype(Box<PlanExpression>),
Bound(usize),
IRI(Box<PlanExpression>),
BNode(Option<Box<PlanExpression>>),
/*Rand(),
Abs(Box<PlanExpression>),
Ceil(Box<PlanExpression>),
Floor(Box<PlanExpression>),
Round(Box<PlanExpression>),
Concat(Vec<PlanExpression>),
SubStr(Box<PlanExpression>, Box<PlanExpression>, Option<Box<PlanExpression>>),
StrLen(Box<PlanExpression>),
Replace(
Box<PlanExpression>,
Box<PlanExpression>,
Box<PlanExpression>,
Option<Box<PlanExpression>>,
),
UCase(Box<PlanExpression>),
LCase(Box<PlanExpression>),
EncodeForURI(Box<PlanExpression>),
Contains(Box<PlanExpression>, Box<PlanExpression>),
StrStarts(Box<PlanExpression>, Box<PlanExpression>),
StrEnds(Box<PlanExpression>, Box<PlanExpression>),
StrBefore(Box<PlanExpression>, Box<PlanExpression>),
StrAfter(Box<PlanExpression>, Box<PlanExpression>),
Year(Box<PlanExpression>),
Month(Box<PlanExpression>),
Day(Box<PlanExpression>),
Hours(Box<PlanExpression>),
Minutes(Box<PlanExpression>),
Seconds(Box<PlanExpression>),
Timezone(Box<PlanExpression>),
Now(),
UUID(),
StrUUID(),
MD5(Box<PlanExpression>),
SHA1(Box<PlanExpression>),
SHA256(Box<PlanExpression>),
SHA384(Box<PlanExpression>),
SHA512(Box<PlanExpression>),
Coalesce(Vec<PlanExpression>),
If(Box<PlanExpression>, Box<PlanExpression>, Box<PlanExpression>),
StrLang(Box<PlanExpression>, Box<PlanExpression>),
StrDT(Box<PlanExpression>, Box<PlanExpression>),*/
SameTerm(Box<PlanExpression>, Box<PlanExpression>),
IsIRI(Box<PlanExpression>),
IsBlank(Box<PlanExpression>),
IsLiteral(Box<PlanExpression>),
IsNumeric(Box<PlanExpression>),
Regex(
Box<PlanExpression>,
Box<PlanExpression>,
Option<Box<PlanExpression>>,
),
}
pub struct PlanBuilder<'a, S: EncodedQuadsStore> {
store: &'a S,
}
impl<'a, S: EncodedQuadsStore> PlanBuilder<'a, S> {
pub fn build(store: &S, pattern: &GraphPattern) -> Result<(PlanNode, Vec<Variable>)> {
let mut variables = Vec::default();
let plan = PlanBuilder { store }.build_for_graph_pattern(
pattern,
PlanNode::Init,
&mut variables,
)?;
Ok((plan, variables))
}
fn build_for_graph_pattern(
&self,
pattern: &GraphPattern,
input: PlanNode,
variables: &mut Vec<Variable>,
) -> Result<PlanNode> {
Ok(match pattern {
GraphPattern::BGP(p) => {
let mut plan = input;
for pattern in p {
plan = match pattern {
TripleOrPathPattern::Triple(pattern) => PlanNode::TriplePatternJoin {
child: Box::new(plan),
subject: self
.pattern_value_from_term_or_variable(&pattern.subject, variables)?,
predicate: self.pattern_value_from_named_node_or_variable(
&pattern.predicate,
variables,
)?,
object: self
.pattern_value_from_term_or_variable(&pattern.object, variables)?,
},
TripleOrPathPattern::Path(pattern) => unimplemented!(),
}
}
plan
}
GraphPattern::Join(a, b) => self.build_for_graph_pattern(
b,
self.build_for_graph_pattern(a, input, variables)?,
variables,
)?,
GraphPattern::LeftJoin(a, b, e) => unimplemented!(),
GraphPattern::Filter(e, p) => PlanNode::Filter {
child: Box::new(self.build_for_graph_pattern(p, input, variables)?),
expression: self.build_for_expression(e, variables)?,
},
GraphPattern::Union(a, b) => {
//We flatten the UNIONs
let mut stack: Vec<&GraphPattern> = vec![a, b];
let mut children = vec![];
loop {
match stack.pop() {
None => break,
Some(GraphPattern::Union(a, b)) => {
stack.push(a);
stack.push(b);
}
Some(p) => children.push(self.build_for_graph_pattern(
a,
PlanNode::Init,
variables,
)?),
}
}
PlanNode::Union {
entry: Box::new(input),
children,
}
}
GraphPattern::Graph(g, p) => unimplemented!(),
GraphPattern::Extend(p, v, e) => unimplemented!(),
GraphPattern::Minus(a, b) => unimplemented!(),
GraphPattern::Service(n, p, s) => unimplemented!(),
GraphPattern::AggregateJoin(g, a) => unimplemented!(),
GraphPattern::Data(bs) => PlanNode::StaticBindings {
tuples: self.encode_bindings(bs, variables)?,
},
GraphPattern::OrderBy(l, o) => self.build_for_graph_pattern(l, input, variables)?, //TODO
GraphPattern::Project(l, new_variables) => PlanNode::Project {
child: Box::new(self.build_for_graph_pattern(
l,
input,
&mut new_variables.clone(),
)?),
mapping: new_variables
.iter()
.map(|variable| variable_key(variables, variable))
.collect(),
},
GraphPattern::Distinct(l) => PlanNode::HashDeduplicate {
child: Box::new(self.build_for_graph_pattern(l, input, variables)?),
},
GraphPattern::Reduced(l) => self.build_for_graph_pattern(l, input, variables)?,
GraphPattern::Slice(l, start, length) => {
let mut plan = self.build_for_graph_pattern(l, input, variables)?;
if *start > 0 {
plan = PlanNode::Skip {
child: Box::new(plan),
count: *start,
};
}
if let Some(length) = length {
plan = PlanNode::Limit {
child: Box::new(plan),
count: *length,
};
}
plan
}
})
}
fn build_for_expression(
&self,
expression: &Expression,
variables: &mut Vec<Variable>,
) -> Result<PlanExpression> {
Ok(match expression {
Expression::ConstantExpression(t) => match t {
TermOrVariable::Term(t) => {
PlanExpression::Constant(self.store.encoder().encode_term(t)?)
}
TermOrVariable::Variable(v) => PlanExpression::Variable(variable_key(variables, v)),
},
Expression::OrExpression(a, b) => PlanExpression::Or(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::AndExpression(a, b) => PlanExpression::And(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::EqualExpression(a, b) => PlanExpression::Equal(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::NotEqualExpression(a, b) => PlanExpression::NotEqual(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::GreaterExpression(a, b) => PlanExpression::Greater(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::GreaterOrEqExpression(a, b) => PlanExpression::GreaterOrEq(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::LowerExpression(a, b) => PlanExpression::Lower(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::LowerOrEqExpression(a, b) => PlanExpression::LowerOrEq(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::AddExpression(a, b) => PlanExpression::Add(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::SubExpression(a, b) => PlanExpression::Sub(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::MulExpression(a, b) => PlanExpression::Mul(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::DivExpression(a, b) => PlanExpression::Div(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::UnaryPlusExpression(e) => {
PlanExpression::UnaryPlus(Box::new(self.build_for_expression(e, variables)?))
}
Expression::UnaryMinusExpression(e) => {
PlanExpression::UnaryMinus(Box::new(self.build_for_expression(e, variables)?))
}
Expression::UnaryNotExpression(e) => {
PlanExpression::UnaryNot(Box::new(self.build_for_expression(e, variables)?))
}
Expression::StrFunctionCall(e) => {
PlanExpression::Str(Box::new(self.build_for_expression(e, variables)?))
}
Expression::LangFunctionCall(e) => {
PlanExpression::Lang(Box::new(self.build_for_expression(e, variables)?))
}
Expression::LangMatchesFunctionCall(a, b) => PlanExpression::LangMatches(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::DatatypeFunctionCall(e) => {
PlanExpression::Datatype(Box::new(self.build_for_expression(e, variables)?))
}
Expression::BoundFunctionCall(v) => PlanExpression::Bound(variable_key(variables, v)),
Expression::IRIFunctionCall(e) => {
PlanExpression::IRI(Box::new(self.build_for_expression(e, variables)?))
}
Expression::BNodeFunctionCall(e) => PlanExpression::BNode(match e {
Some(e) => Some(Box::new(self.build_for_expression(e, variables)?)),
None => None,
}),
Expression::SameTermFunctionCall(a, b) => PlanExpression::SameTerm(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
),
Expression::IsIRIFunctionCall(e) => {
PlanExpression::IsIRI(Box::new(self.build_for_expression(e, variables)?))
}
Expression::IsBlankFunctionCall(e) => {
PlanExpression::IsBlank(Box::new(self.build_for_expression(e, variables)?))
}
Expression::IsLiteralFunctionCall(e) => {
PlanExpression::IsLiteral(Box::new(self.build_for_expression(e, variables)?))
}
Expression::IsNumericFunctionCall(e) => {
PlanExpression::IsNumeric(Box::new(self.build_for_expression(e, variables)?))
}
Expression::RegexFunctionCall(a, b, c) => PlanExpression::Regex(
Box::new(self.build_for_expression(a, variables)?),
Box::new(self.build_for_expression(b, variables)?),
match c {
Some(c) => Some(Box::new(self.build_for_expression(c, variables)?)),
None => None,
},
),
_ => unimplemented!(),
})
}
fn pattern_value_from_term_or_variable(
&self,
term_or_variable: &TermOrVariable,
variables: &mut Vec<Variable>,
) -> Result<PatternValue> {
Ok(match term_or_variable {
TermOrVariable::Term(term) => {
PatternValue::Constant(self.store.encoder().encode_term(term)?)
}
TermOrVariable::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable))
}
})
}
fn pattern_value_from_named_node_or_variable(
&self,
named_node_or_variable: &NamedNodeOrVariable,
variables: &mut Vec<Variable>,
) -> Result<PatternValue> {
Ok(match named_node_or_variable {
NamedNodeOrVariable::NamedNode(named_node) => {
PatternValue::Constant(self.store.encoder().encode_named_node(named_node)?)
}
NamedNodeOrVariable::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable))
}
})
}
fn encode_bindings(
&self,
bindings: &StaticBindings,
variables: &mut Vec<Variable>,
) -> Result<Vec<EncodedTuple>> {
let encoder = self.store.encoder();
let bindings_variables = bindings.variables();
bindings
.values_iter()
.map(move |values| {
let mut result = vec![None; variables.len()];
for (key, value) in values.iter().enumerate() {
if let Some(term) = value {
result[variable_key(variables, &bindings_variables[key])] =
Some(encoder.encode_term(term)?);
}
}
Ok(result)
}).collect()
}
}
fn variable_key(variables: &mut Vec<Variable>, variable: &Variable) -> usize {
match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
}
}
fn slice_key<T: Eq>(slice: &[T], element: &T) -> Option<usize> {
for (i, item) in slice.iter().enumerate() {
if item == element {
return Some(i);
}
}
None
}
Loading…
Cancel
Save