SPARQL query plan: attach original RDF terms

Helps debugging
pull/470/head
Tpt 1 year ago committed by Thomas Tanon
parent 6af6c9c0eb
commit 9dc1106b9a
  1. 156
      lib/src/sparql/eval.rs
  2. 130
      lib/src/sparql/plan.rs
  3. 329
      lib/src/sparql/plan_builder.rs

@ -1,6 +1,5 @@
use crate::model::vocab::{rdf, xsd};
use crate::model::{BlankNode, LiteralRef, NamedNodeRef};
use crate::model::{NamedNode, Term, Triple};
use crate::model::{BlankNode, LiteralRef, NamedNode, NamedNodeRef, Term, Triple};
use crate::sparql::algebra::{Query, QueryDataset};
use crate::sparql::dataset::DatasetView;
use crate::sparql::error::EvaluationError;
@ -110,8 +109,8 @@ impl SimpleEvaluator {
node: &PlanNode,
) -> Rc<dyn Fn(EncodedTuple) -> EncodedTuplesIterator> {
match node {
PlanNode::StaticBindings { tuples } => {
let tuples = tuples.clone();
PlanNode::StaticBindings { encoded_tuples, .. } => {
let tuples = encoded_tuples.clone();
Rc::new(move |from| {
Box::new(
tuples
@ -162,10 +161,10 @@ impl SimpleEvaluator {
object,
graph_name,
} => {
let subject = subject.clone();
let predicate = predicate.clone();
let object = object.clone();
let graph_name = graph_name.clone();
let subject = TupleSelector::from(subject);
let predicate = TupleSelector::from(predicate);
let object = TupleSelector::from(object);
let graph_name = TupleSelector::from(graph_name);
let dataset = self.dataset.clone();
Rc::new(move |from| {
let iter = dataset.encoded_quads_for_pattern(
@ -197,10 +196,10 @@ impl SimpleEvaluator {
object,
graph_name,
} => {
let subject = subject.clone();
let subject = TupleSelector::from(subject);
let path = path.clone();
let object = object.clone();
let graph_name = graph_name.clone();
let object = TupleSelector::from(object);
let graph_name = TupleSelector::from(graph_name);
let dataset = self.dataset.clone();
Rc::new(move |from| {
let input_subject = get_pattern_value(&subject, &from);
@ -530,11 +529,11 @@ impl SimpleEvaluator {
}
PlanNode::Extend {
child,
position,
variable,
expression,
} => {
let child = self.plan_evaluator(child);
let position = *position;
let position = variable.encoded;
let expression = self.expression_evaluator(expression);
Rc::new(move |from| {
let expression = expression.clone();
@ -634,8 +633,8 @@ impl SimpleEvaluator {
let mapping = mapping.clone();
let mut input_tuple = EncodedTuple::with_capacity(mapping.len());
for (input_key, output_key) in mapping.iter() {
if let Some(value) = from.get(*output_key) {
input_tuple.set(*input_key, value.clone());
if let Some(value) = from.get(output_key.encoded) {
input_tuple.set(input_key.encoded, value.clone());
}
}
Box::new(child(input_tuple).filter_map(move |tuple| {
@ -643,14 +642,15 @@ impl SimpleEvaluator {
Ok(tuple) => {
let mut output_tuple = from.clone();
for (input_key, output_key) in mapping.iter() {
if let Some(value) = tuple.get(*input_key) {
if let Some(existing_value) = output_tuple.get(*output_key)
if let Some(value) = tuple.get(input_key.encoded) {
if let Some(existing_value) =
output_tuple.get(output_key.encoded)
{
if existing_value != value {
return None; // Conflict
}
} else {
output_tuple.set(*output_key, value.clone());
output_tuple.set(output_key.encoded, value.clone());
}
}
}
@ -688,7 +688,7 @@ impl SimpleEvaluator {
})
.collect();
let accumulator_variables: Vec<_> =
aggregates.iter().map(|(_, var)| *var).collect();
aggregates.iter().map(|(_, var)| var.encoded).collect();
Rc::new(move |from| {
let tuple_size = from.capacity();
let key_variables = key_variables.clone();
@ -707,7 +707,7 @@ impl SimpleEvaluator {
//TODO avoid copy for key?
let key = key_variables
.iter()
.map(|v| tuple.get(*v).cloned())
.map(|v| tuple.get(v.encoded).cloned())
.collect();
let key_accumulators =
@ -739,7 +739,7 @@ impl SimpleEvaluator {
let mut result = EncodedTuple::with_capacity(tuple_size);
for (variable, value) in key_variables.iter().zip(key) {
if let Some(value) = value {
result.set(*variable, value);
result.set(variable.encoded, value);
}
}
for (accumulator, variable) in
@ -765,7 +765,7 @@ impl SimpleEvaluator {
variables: Rc<Vec<Variable>>,
from: &EncodedTuple,
) -> Result<EncodedTuplesIterator, EvaluationError> {
let service_name = get_pattern_value(service_name, from)
let service_name = get_pattern_value(&service_name.into(), from)
.ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?;
if let QueryResults::Solutions(iter) = self.service_handler.handle(
self.dataset.decode_named_node(&service_name)?,
@ -851,12 +851,16 @@ impl SimpleEvaluator {
expression: &PlanExpression,
) -> Rc<dyn Fn(&EncodedTuple) -> Option<EncodedTerm>> {
match expression {
PlanExpression::Constant(t) => {
let t = t.clone();
PlanExpression::NamedNode(t) => {
let t = t.encoded.clone();
Rc::new(move |_| Some(t.clone()))
}
PlanExpression::Literal(t) => {
let t = t.encoded.clone();
Rc::new(move |_| Some(t.clone()))
}
PlanExpression::Variable(v) => {
let v = *v;
let v = v.encoded;
Rc::new(move |tuple| tuple.get(v).cloned())
}
PlanExpression::Exists(plan) => {
@ -1158,7 +1162,7 @@ impl SimpleEvaluator {
Rc::new(move |tuple| datatype(&dataset, &e(tuple)?))
}
PlanExpression::Bound(v) => {
let v = *v;
let v = v.encoded;
Rc::new(move |tuple| Some(tuple.contains(v).into()))
}
PlanExpression::Iri(e) => {
@ -2924,14 +2928,32 @@ impl NumericBinaryOperands {
}
}
fn get_pattern_value<'a>(
selector: &'a PatternValue,
tuple: &'a EncodedTuple,
) -> Option<EncodedTerm> {
#[derive(Clone)]
enum TupleSelector {
Constant(EncodedTerm),
Variable(usize),
TriplePattern(Rc<TripleTupleSelector>),
}
impl From<&PatternValue> for TupleSelector {
fn from(value: &PatternValue) -> Self {
match value {
PatternValue::Constant(c) => Self::Constant(c.encoded.clone()),
PatternValue::Variable(v) => Self::Variable(v.encoded),
PatternValue::TriplePattern(p) => Self::TriplePattern(Rc::new(TripleTupleSelector {
subject: (&p.subject).into(),
predicate: (&p.predicate).into(),
object: (&p.object).into(),
})),
}
}
}
fn get_pattern_value(selector: &TupleSelector, tuple: &EncodedTuple) -> Option<EncodedTerm> {
match selector {
PatternValue::Constant(term) => Some(term.clone()),
PatternValue::Variable(v) => tuple.get(*v).cloned(),
PatternValue::Triple(triple) => Some(
TupleSelector::Constant(c) => Some(c.clone()),
TupleSelector::Variable(v) => tuple.get(*v).cloned(),
TupleSelector::TriplePattern(triple) => Some(
EncodedTriple {
subject: get_pattern_value(&triple.subject, tuple)?,
predicate: get_pattern_value(&triple.predicate, tuple)?,
@ -2942,20 +2964,26 @@ fn get_pattern_value<'a>(
}
}
struct TripleTupleSelector {
subject: TupleSelector,
predicate: TupleSelector,
object: TupleSelector,
}
fn put_pattern_value(
selector: &PatternValue,
selector: &TupleSelector,
value: EncodedTerm,
tuple: &mut EncodedTuple,
) -> Option<()> {
match selector {
PatternValue::Constant(c) => {
TupleSelector::Constant(c) => {
if *c == value {
Some(())
} else {
None
}
}
PatternValue::Variable(v) => {
TupleSelector::Variable(v) => {
if let Some(old) = tuple.get(*v) {
if value == *old {
Some(())
@ -2967,7 +2995,7 @@ fn put_pattern_value(
Some(())
}
}
PatternValue::Triple(triple) => {
TupleSelector::TriplePattern(triple) => {
if let EncodedTerm::Triple(value) = value {
put_pattern_value(&triple.subject, value.subject.clone(), tuple)?;
put_pattern_value(&triple.predicate, value.predicate.clone(), tuple)?;
@ -3022,7 +3050,12 @@ impl PathEvaluator {
Ok(match path {
PlanPropertyPath::Path(p) => self
.dataset
.encoded_quads_for_pattern(Some(start), Some(p), Some(end), Some(graph_name))
.encoded_quads_for_pattern(
Some(start),
Some(&p.encoded),
Some(end),
Some(graph_name),
)
.next()
.transpose()?
.is_some(),
@ -3074,7 +3107,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(Some(start), None, Some(end), Some(graph_name))
.find_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok(()))
@ -3096,7 +3129,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(Some(start), Some(p), Some(end), None)
.encoded_quads_for_pattern(Some(start), Some(&p.encoded), Some(end), None)
.map(|t| Ok(t?.graph_name)),
),
PlanPropertyPath::Reverse(p) => self.eval_closed_in_unknown_graph(p, end, start),
@ -3178,7 +3211,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(Some(start), None, Some(end), None)
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok(t.graph_name))
@ -3200,7 +3233,12 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(Some(start), Some(p), None, Some(graph_name))
.encoded_quads_for_pattern(
Some(start),
Some(&p.encoded),
None,
Some(graph_name),
)
.map(|t| Ok(t?.object)),
),
PlanPropertyPath::Reverse(p) => self.eval_to_in_graph(p, start, graph_name),
@ -3253,7 +3291,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(Some(start), None, None, Some(graph_name))
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok(t.object))
@ -3274,7 +3312,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(Some(start), Some(p), None, None)
.encoded_quads_for_pattern(Some(start), Some(&p.encoded), None, None)
.map(|t| {
let t = t?;
Ok((t.object, t.graph_name))
@ -3340,7 +3378,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(Some(start), None, None, None)
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok((t.object, t.graph_name)))
@ -3362,7 +3400,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(None, Some(p), Some(end), Some(graph_name))
.encoded_quads_for_pattern(None, Some(&p.encoded), Some(end), Some(graph_name))
.map(|t| Ok(t?.subject)),
),
PlanPropertyPath::Reverse(p) => self.eval_from_in_graph(p, end, graph_name),
@ -3414,7 +3452,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(None, None, Some(end), Some(graph_name))
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok(t.subject))
@ -3434,7 +3472,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(None, Some(p), Some(end), None)
.encoded_quads_for_pattern(None, Some(&p.encoded), Some(end), None)
.map(|t| {
let t = t?;
Ok((t.subject, t.graph_name))
@ -3500,7 +3538,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(Some(end), None, None, None)
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok((t.subject, t.graph_name)))
@ -3521,7 +3559,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(None, Some(p), None, Some(graph_name))
.encoded_quads_for_pattern(None, Some(&p.encoded), None, Some(graph_name))
.map(|t| t.map(|t| (t.subject, t.object))),
),
PlanPropertyPath::Reverse(p) => Box::new(
@ -3578,7 +3616,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(None, None, None, Some(graph_name))
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok((t.subject, t.object)))
@ -3599,7 +3637,7 @@ impl PathEvaluator {
match path {
PlanPropertyPath::Path(p) => Box::new(
self.dataset
.encoded_quads_for_pattern(None, Some(p), None, None)
.encoded_quads_for_pattern(None, Some(&p.encoded), None, None)
.map(|t| t.map(|t| (t.subject, t.object, t.graph_name))),
),
PlanPropertyPath::Reverse(p) => Box::new(
@ -3653,7 +3691,7 @@ impl PathEvaluator {
.encoded_quads_for_pattern(None, None, None, None)
.filter_map(move |t| match t {
Ok(t) => {
if ps.contains(&t.predicate) {
if ps.iter().any(|p| p.encoded == t.predicate) {
None
} else {
Some(Ok((t.subject, t.object, t.graph_name)))
@ -4081,13 +4119,13 @@ fn get_triple_template_value<'a>(
bnodes: &'a mut Vec<EncodedTerm>,
) -> Option<EncodedTerm> {
match selector {
TripleTemplateValue::Constant(term) => Some(term.clone()),
TripleTemplateValue::Variable(v) => tuple.get(*v).cloned(),
TripleTemplateValue::BlankNode(id) => {
if *id >= bnodes.len() {
bnodes.resize_with(*id + 1, new_bnode)
TripleTemplateValue::Constant(term) => Some(term.encoded.clone()),
TripleTemplateValue::Variable(v) => tuple.get(v.encoded).cloned(),
TripleTemplateValue::BlankNode(bnode) => {
if bnode.encoded >= bnodes.len() {
bnodes.resize_with(bnode.encoded + 1, new_bnode)
}
Some(bnodes[*id].clone())
Some(bnodes[bnode.encoded].clone())
}
TripleTemplateValue::Triple(triple) => Some(
EncodedTriple {

@ -1,8 +1,9 @@
use crate::model::NamedNode;
use crate::model::{BlankNode, Literal, NamedNode, Term, Triple};
use crate::sparql::Variable;
use crate::storage::numeric_encoder::EncodedTerm;
use oxrdf::Variable;
use regex::Regex;
use spargebra::algebra::GraphPattern;
use spargebra::term::GroundTerm;
use std::cmp::max;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet};
@ -11,7 +12,9 @@ use std::rc::Rc;
#[derive(Debug, Clone)]
pub enum PlanNode {
StaticBindings {
tuples: Vec<EncodedTuple>,
encoded_tuples: Vec<EncodedTuple>,
variables: Vec<PlanVariable>,
plain_bindings: Vec<Vec<Option<GroundTerm>>>,
},
Service {
service_name: PatternValue,
@ -68,7 +71,7 @@ pub enum PlanNode {
},
Extend {
child: Box<Self>,
position: usize,
variable: PlanVariable,
expression: Box<PlanExpression>,
},
Sort {
@ -92,13 +95,13 @@ pub enum PlanNode {
},
Project {
child: Box<Self>,
mapping: Rc<Vec<(usize, usize)>>, // pairs of (variable key in child, variable key in output)
mapping: Rc<Vec<(PlanVariable, PlanVariable)>>, // pairs of (variable key in child, variable key in output)
},
Aggregate {
// By definition the group by key are the range 0..key_mapping.len()
child: Box<Self>,
key_variables: Rc<Vec<usize>>,
aggregates: Rc<Vec<(PlanAggregation, usize)>>,
key_variables: Rc<Vec<PlanVariable>>,
aggregates: Rc<Vec<(PlanAggregation, PlanVariable)>>,
},
}
@ -114,8 +117,8 @@ impl PlanNode {
pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) {
match self {
Self::StaticBindings { tuples } => {
for tuple in tuples {
Self::StaticBindings { encoded_tuples, .. } => {
for tuple in encoded_tuples {
for (key, value) in tuple.iter().enumerate() {
if value.is_some() {
callback(key);
@ -130,16 +133,16 @@ impl PlanNode {
graph_name,
} => {
if let PatternValue::Variable(var) = subject {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = predicate {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = object {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = graph_name {
callback(*var);
callback(var.encoded);
}
}
Self::PathPattern {
@ -149,13 +152,13 @@ impl PlanNode {
..
} => {
if let PatternValue::Variable(var) = subject {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = object {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = graph_name {
callback(*var);
callback(var.encoded);
}
}
Self::Filter { child, expression } => {
@ -185,10 +188,10 @@ impl PlanNode {
}
Self::Extend {
child,
position,
variable,
expression,
} => {
callback(*position);
callback(variable.encoded);
expression.lookup_used_variables(callback);
child.lookup_used_variables(callback);
}
@ -203,15 +206,15 @@ impl PlanNode {
..
} => {
if let PatternValue::Variable(v) = service_name {
callback(*v);
callback(v.encoded);
}
child.lookup_used_variables(callback);
}
Self::Project { mapping, child } => {
let child_bound = child.used_variables();
for (child_i, output_i) in mapping.iter() {
if child_bound.contains(child_i) {
callback(*output_i);
if child_bound.contains(&child_i.encoded) {
callback(output_i.encoded);
}
}
}
@ -221,10 +224,10 @@ impl PlanNode {
..
} => {
for var in key_variables.iter() {
callback(*var);
callback(var.encoded);
}
for (_, var) in aggregates.iter() {
callback(*var);
callback(var.encoded);
}
}
}
@ -243,10 +246,10 @@ impl PlanNode {
pub fn lookup_always_bound_variables(&self, callback: &mut impl FnMut(usize)) {
match self {
Self::StaticBindings { tuples } => {
Self::StaticBindings { encoded_tuples, .. } => {
let mut variables = BTreeMap::default(); // value true iff always bound
let max_tuple_length = tuples.iter().map(|t| t.capacity()).fold(0, max);
for tuple in tuples {
let max_tuple_length = encoded_tuples.iter().map(|t| t.capacity()).fold(0, max);
for tuple in encoded_tuples {
for key in 0..max_tuple_length {
match variables.entry(key) {
Entry::Vacant(e) => {
@ -273,16 +276,16 @@ impl PlanNode {
graph_name,
} => {
if let PatternValue::Variable(var) = subject {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = predicate {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = object {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = graph_name {
callback(*var);
callback(var.encoded);
}
}
Self::PathPattern {
@ -292,13 +295,13 @@ impl PlanNode {
..
} => {
if let PatternValue::Variable(var) = subject {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = object {
callback(*var);
callback(var.encoded);
}
if let PatternValue::Variable(var) = graph_name {
callback(*var);
callback(var.encoded);
}
}
Self::Filter { child, .. } => {
@ -327,12 +330,15 @@ impl PlanNode {
}
Self::Extend {
child,
position,
variable,
expression,
} => {
if matches!(expression.as_ref(), PlanExpression::Constant(_)) {
if matches!(
expression.as_ref(),
PlanExpression::NamedNode(_) | PlanExpression::Literal(_)
) {
// TODO: more cases?
callback(*position);
callback(variable.encoded);
}
child.lookup_always_bound_variables(callback);
}
@ -351,8 +357,8 @@ impl PlanNode {
Self::Project { mapping, child } => {
let child_bound = child.always_bound_variables();
for (child_i, output_i) in mapping.iter() {
if child_bound.contains(child_i) {
callback(*output_i);
if child_bound.contains(&child_i.encoded) {
callback(output_i.encoded);
}
}
}
@ -373,11 +379,25 @@ impl PlanNode {
}
}
#[derive(Debug, Clone)]
pub struct PlanTerm<T> {
pub encoded: EncodedTerm,
pub plain: T,
}
#[derive(Debug, Clone)]
pub enum PatternValue {
Constant(EncodedTerm),
Variable(usize),
Triple(Box<TriplePatternValue>),
Constant(PlanTerm<PatternValueConstant>),
Variable(PlanVariable),
TriplePattern(Box<TriplePatternValue>),
}
#[derive(Debug, Clone)]
pub enum PatternValueConstant {
NamedNode(NamedNode),
Literal(Literal),
Triple(Box<Triple>),
DefaultGraph,
}
#[derive(Debug, Clone)]
@ -387,10 +407,17 @@ pub struct TriplePatternValue {
pub object: PatternValue,
}
#[derive(Debug, Clone)]
pub struct PlanVariable<P = Variable> {
pub encoded: usize,
pub plain: P,
}
#[derive(Debug, Clone)]
pub enum PlanExpression {
Constant(EncodedTerm),
Variable(usize),
NamedNode(PlanTerm<NamedNode>),
Literal(PlanTerm<Literal>),
Variable(PlanVariable),
Exists(Rc<PlanNode>),
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
@ -410,7 +437,7 @@ pub enum PlanExpression {
Lang(Box<Self>),
LangMatches(Box<Self>, Box<Self>),
Datatype(Box<Self>),
Bound(usize),
Bound(PlanVariable),
Iri(Box<Self>),
BNode(Option<Box<Self>>),
Rand,
@ -483,9 +510,10 @@ impl PlanExpression {
pub fn lookup_used_variables(&self, callback: &mut impl FnMut(usize)) {
match self {
Self::Variable(v) | Self::Bound(v) => {
callback(*v);
callback(v.encoded);
}
Self::Constant(_)
Self::NamedNode(_)
| Self::Literal(_)
| Self::Rand
| Self::Now
| Self::Uuid
@ -615,14 +643,14 @@ pub enum PlanAggregationFunction {
#[derive(Debug, Clone)]
pub enum PlanPropertyPath {
Path(EncodedTerm),
Path(PlanTerm<NamedNode>),
Reverse(Rc<Self>),
Sequence(Rc<Self>, Rc<Self>),
Alternative(Rc<Self>, Rc<Self>),
ZeroOrMore(Rc<Self>),
OneOrMore(Rc<Self>),
ZeroOrOne(Rc<Self>),
NegatedPropertySet(Rc<Vec<EncodedTerm>>),
NegatedPropertySet(Rc<Vec<PlanTerm<NamedNode>>>),
}
#[derive(Debug, Clone)]
@ -640,9 +668,9 @@ pub struct TripleTemplate {
#[derive(Debug, Clone)]
pub enum TripleTemplateValue {
Constant(EncodedTerm),
BlankNode(usize),
Variable(usize),
Constant(PlanTerm<Term>),
BlankNode(PlanVariable<BlankNode>),
Variable(PlanVariable),
Triple(Box<TripleTemplate>),
}

@ -37,7 +37,10 @@ impl<'a> PlanBuilder<'a> {
.build_for_graph_pattern(
pattern,
&mut variables,
&PatternValue::Constant(EncodedTerm::DefaultGraph),
&PatternValue::Constant(PlanTerm {
encoded: EncodedTerm::DefaultGraph,
plain: PatternValueConstant::DefaultGraph,
}),
)?;
let plan = if !without_optimizations && !is_cardinality_meaningful {
// let's reduce downstream task.
@ -125,13 +128,13 @@ impl<'a> PlanBuilder<'a> {
PlanNode::HashLeftJoin {
left: Box::new(left),
right: Box::new(right),
expression: Box::new(
expression
.as_ref()
.map_or(Ok(PlanExpression::Constant(true.into())), |e| {
self.build_for_expression(e, variables, graph_name)
})?,
),
expression: Box::new(expression.as_ref().map_or(
Ok(PlanExpression::Literal(PlanTerm {
encoded: true.into(),
plain: true.into(),
})),
|e| self.build_for_expression(e, variables, graph_name),
)?),
}
}
}
@ -171,7 +174,7 @@ impl<'a> PlanBuilder<'a> {
expression,
} => PlanNode::Extend {
child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?),
position: variable_key(variables, variable),
variable: build_plan_variable(variables, variable),
expression: Box::new(self.build_for_expression(expression, variables, graph_name)?),
},
GraphPattern::Minus { left, right } => PlanNode::AntiJoin {
@ -200,14 +203,18 @@ impl<'a> PlanBuilder<'a> {
aggregates,
} => PlanNode::Aggregate {
child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?),
key_variables: Rc::new(by.iter().map(|k| variable_key(variables, k)).collect()),
key_variables: Rc::new(
by.iter()
.map(|k| build_plan_variable(variables, k))
.collect(),
),
aggregates: Rc::new(
aggregates
.iter()
.map(|(v, a)| {
Ok((
self.build_for_aggregate(a, variables, graph_name)?,
variable_key(variables, v),
build_plan_variable(variables, v),
))
})
.collect::<Result<Vec<_>, EvaluationError>>()?,
@ -216,9 +223,36 @@ impl<'a> PlanBuilder<'a> {
GraphPattern::Values {
variables: table_variables,
bindings,
} => PlanNode::StaticBindings {
tuples: self.encode_bindings(table_variables, bindings, variables),
},
} => {
let bindings_variables = table_variables
.iter()
.map(|v| build_plan_variable(variables, v))
.collect::<Vec<_>>();
let encoded_tuples = bindings
.iter()
.map(|row| {
let mut result = EncodedTuple::with_capacity(variables.len());
for (key, value) in row.iter().enumerate() {
if let Some(term) = value {
result.set(
bindings_variables[key].encoded,
match term {
GroundTerm::NamedNode(node) => self.build_term(node),
GroundTerm::Literal(literal) => self.build_term(literal),
GroundTerm::Triple(triple) => self.build_triple(triple),
},
);
}
}
result
})
.collect();
PlanNode::StaticBindings {
encoded_tuples,
variables: bindings_variables,
plain_bindings: bindings.clone(),
}
}
GraphPattern::OrderBy { inner, expression } => {
let condition: Result<Vec<_>, EvaluationError> = expression
.iter()
@ -242,7 +276,7 @@ impl<'a> PlanBuilder<'a> {
} => {
let mut inner_variables = projection.clone();
let inner_graph_name =
Self::convert_pattern_value_id(graph_name, variables, &mut inner_variables);
Self::convert_pattern_value_id(graph_name, &mut inner_variables);
PlanNode::Project {
child: Box::new(self.build_for_graph_pattern(
inner,
@ -254,7 +288,13 @@ impl<'a> PlanBuilder<'a> {
.iter()
.enumerate()
.map(|(new_variable, variable)| {
(new_variable, variable_key(variables, variable))
(
PlanVariable {
encoded: new_variable,
plain: variable.clone(),
},
build_plan_variable(variables, variable),
)
})
.collect(),
),
@ -306,13 +346,18 @@ impl<'a> PlanBuilder<'a> {
})
.reduce(|a, b| self.new_join(a, b))
.unwrap_or_else(|| PlanNode::StaticBindings {
tuples: vec![EncodedTuple::with_capacity(variables.len())],
encoded_tuples: vec![EncodedTuple::with_capacity(variables.len())],
variables: Vec::new(),
plain_bindings: vec![Vec::new()],
})
}
fn build_for_path(&self, path: &PropertyPathExpression) -> PlanPropertyPath {
match path {
PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(self.build_term(p)),
PropertyPathExpression::NamedNode(p) => PlanPropertyPath::Path(PlanTerm {
encoded: self.build_term(p),
plain: p.clone(),
}),
PropertyPathExpression::Reverse(p) => {
PlanPropertyPath::Reverse(Rc::new(self.build_for_path(p)))
}
@ -333,9 +378,16 @@ impl<'a> PlanBuilder<'a> {
PropertyPathExpression::ZeroOrOne(p) => {
PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p)))
}
PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet(
Rc::new(p.iter().map(|p| self.build_term(p)).collect()),
),
PropertyPathExpression::NegatedPropertySet(p) => {
PlanPropertyPath::NegatedPropertySet(Rc::new(
p.iter()
.map(|p| PlanTerm {
encoded: self.build_term(p),
plain: p.clone(),
})
.collect(),
))
}
}
}
@ -346,9 +398,15 @@ impl<'a> PlanBuilder<'a> {
graph_name: &PatternValue,
) -> Result<PlanExpression, EvaluationError> {
Ok(match expression {
Expression::NamedNode(node) => PlanExpression::Constant(self.build_term(node)),
Expression::Literal(l) => PlanExpression::Constant(self.build_term(l)),
Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)),
Expression::NamedNode(node) => PlanExpression::NamedNode(PlanTerm {
encoded: self.build_term(node),
plain: node.clone(),
}),
Expression::Literal(l) => PlanExpression::Literal(PlanTerm {
encoded: self.build_term(l),
plain: l.clone(),
}),
Expression::Variable(v) => PlanExpression::Variable(build_plan_variable(variables, v)),
Expression::Or(a, b) => PlanExpression::Or(
Box::new(self.build_for_expression(a, variables, graph_name)?),
Box::new(self.build_for_expression(b, variables, graph_name)?),
@ -393,7 +451,12 @@ impl<'a> PlanBuilder<'a> {
.reduce(|a: Result<_, EvaluationError>, b| {
Ok(PlanExpression::Or(Box::new(a?), Box::new(b?)))
})
.unwrap_or_else(|| Ok(PlanExpression::Constant(false.into())))?
.unwrap_or_else(|| {
Ok(PlanExpression::Literal(PlanTerm {
encoded: false.into(),
plain: false.into(),
}))
})?
}
Expression::Add(a, b) => PlanExpression::Add(
Box::new(self.build_for_expression(a, variables, graph_name)?),
@ -824,7 +887,7 @@ impl<'a> PlanBuilder<'a> {
}
}
},
Expression::Bound(v) => PlanExpression::Bound(variable_key(variables, v)),
Expression::Bound(v) => PlanExpression::Bound(build_plan_variable(variables, v)),
Expression::If(a, b, c) => PlanExpression::If(
Box::new(self.build_for_expression(a, variables, graph_name)?),
Box::new(self.build_for_expression(b, variables, graph_name)?),
@ -883,17 +946,23 @@ impl<'a> PlanBuilder<'a> {
) -> PatternValue {
match term_or_variable {
TermPattern::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable))
PatternValue::Variable(build_plan_variable(variables, variable))
}
TermPattern::NamedNode(node) => PatternValue::Constant(self.build_term(node)),
TermPattern::NamedNode(node) => PatternValue::Constant(PlanTerm {
encoded: self.build_term(node),
plain: PatternValueConstant::NamedNode(node.clone()),
}),
TermPattern::BlankNode(bnode) => {
PatternValue::Variable(variable_key(
PatternValue::Variable(build_plan_variable(
variables,
&Variable::new_unchecked(bnode.as_str()),
))
//TODO: very bad hack to convert bnode to variable
}
TermPattern::Literal(literal) => PatternValue::Constant(self.build_term(literal)),
TermPattern::Literal(literal) => PatternValue::Constant(PlanTerm {
encoded: self.build_term(literal),
plain: PatternValueConstant::Literal(literal.clone()),
}),
TermPattern::Triple(triple) => {
match (
self.pattern_value_from_term_or_variable(&triple.subject, variables),
@ -901,19 +970,48 @@ impl<'a> PlanBuilder<'a> {
self.pattern_value_from_term_or_variable(&triple.object, variables),
) {
(
PatternValue::Constant(subject),
PatternValue::Constant(predicate),
PatternValue::Constant(object),
) => PatternValue::Constant(
EncodedTriple {
subject,
predicate,
object,
PatternValue::Constant(PlanTerm {
encoded: encoded_subject,
plain: plain_subject,
}),
PatternValue::Constant(PlanTerm {
encoded: encoded_predicate,
plain: plain_predicate,
}),
PatternValue::Constant(PlanTerm {
encoded: encoded_object,
plain: plain_object,
}),
) => PatternValue::Constant(PlanTerm {
encoded: EncodedTriple {
subject: encoded_subject,
predicate: encoded_predicate,
object: encoded_object,
}
.into(),
),
plain: PatternValueConstant::Triple(Box::new(Triple {
subject: match plain_subject {
PatternValueConstant::NamedNode(s) => s.into(),
PatternValueConstant::Triple(s) => s.into(),
PatternValueConstant::Literal(_)
| PatternValueConstant::DefaultGraph => unreachable!(),
},
predicate: match plain_predicate {
PatternValueConstant::NamedNode(s) => s,
PatternValueConstant::Literal(_)
| PatternValueConstant::Triple(_)
| PatternValueConstant::DefaultGraph => unreachable!(),
},
object: match plain_object {
PatternValueConstant::NamedNode(s) => s.into(),
PatternValueConstant::Literal(s) => s.into(),
PatternValueConstant::Triple(s) => s.into(),
PatternValueConstant::DefaultGraph => unreachable!(),
},
})),
}),
(subject, predicate, object) => {
PatternValue::Triple(Box::new(TriplePatternValue {
PatternValue::TriplePattern(Box::new(TriplePatternValue {
subject,
predicate,
object,
@ -930,45 +1028,16 @@ impl<'a> PlanBuilder<'a> {
variables: &mut Vec<Variable>,
) -> PatternValue {
match named_node_or_variable {
NamedNodePattern::NamedNode(named_node) => {
PatternValue::Constant(self.build_term(named_node))
}
NamedNodePattern::NamedNode(named_node) => PatternValue::Constant(PlanTerm {
encoded: self.build_term(named_node),
plain: PatternValueConstant::NamedNode(named_node.clone()),
}),
NamedNodePattern::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable))
PatternValue::Variable(build_plan_variable(variables, variable))
}
}
}
fn encode_bindings(
&self,
table_variables: &[Variable],
rows: &[Vec<Option<GroundTerm>>],
variables: &mut Vec<Variable>,
) -> Vec<EncodedTuple> {
let bindings_variables_keys = table_variables
.iter()
.map(|v| variable_key(variables, v))
.collect::<Vec<_>>();
rows.iter()
.map(move |row| {
let mut result = EncodedTuple::with_capacity(variables.len());
for (key, value) in row.iter().enumerate() {
if let Some(term) = value {
result.set(
bindings_variables_keys[key],
match term {
GroundTerm::NamedNode(node) => self.build_term(node),
GroundTerm::Literal(literal) => self.build_term(literal),
GroundTerm::Triple(triple) => self.build_triple(triple),
},
);
}
}
result
})
.collect()
}
fn build_for_aggregate(
&self,
aggregate: &AggregateExpression,
@ -1059,15 +1128,20 @@ impl<'a> PlanBuilder<'a> {
) -> TripleTemplateValue {
match term_or_variable {
TermPattern::Variable(variable) => {
TripleTemplateValue::Variable(variable_key(variables, variable))
}
TermPattern::NamedNode(node) => TripleTemplateValue::Constant(self.build_term(node)),
TermPattern::BlankNode(bnode) => {
TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode))
}
TermPattern::Literal(literal) => {
TripleTemplateValue::Constant(self.build_term(literal))
TripleTemplateValue::Variable(build_plan_variable(variables, variable))
}
TermPattern::NamedNode(node) => TripleTemplateValue::Constant(PlanTerm {
encoded: self.build_term(node),
plain: node.clone().into(),
}),
TermPattern::BlankNode(bnode) => TripleTemplateValue::BlankNode(PlanVariable {
encoded: bnode_key(bnodes, bnode),
plain: bnode.clone(),
}),
TermPattern::Literal(literal) => TripleTemplateValue::Constant(PlanTerm {
encoded: self.build_term(literal),
plain: literal.clone().into(),
}),
TermPattern::Triple(triple) => match (
self.template_value_from_term_or_variable(&triple.subject, variables, bnodes),
self.template_value_from_named_node_or_variable(&triple.predicate, variables),
@ -1077,14 +1151,30 @@ impl<'a> PlanBuilder<'a> {
TripleTemplateValue::Constant(subject),
TripleTemplateValue::Constant(predicate),
TripleTemplateValue::Constant(object),
) => TripleTemplateValue::Constant(
EncodedTriple {
subject,
predicate,
object,
) => TripleTemplateValue::Constant(PlanTerm {
encoded: EncodedTriple {
subject: subject.encoded,
predicate: predicate.encoded,
object: object.encoded,
}
.into(),
),
plain: Triple {
subject: match subject.plain {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => node.into(),
Term::Literal(_) => unreachable!(),
Term::Triple(node) => node.into(),
},
predicate: match predicate.plain {
Term::NamedNode(node) => node,
Term::BlankNode(_) | Term::Literal(_) | Term::Triple(_) => {
unreachable!()
}
},
object: object.plain,
}
.into(),
}),
(subject, predicate, object) => {
TripleTemplateValue::Triple(Box::new(TripleTemplate {
subject,
@ -1103,35 +1193,34 @@ impl<'a> PlanBuilder<'a> {
) -> TripleTemplateValue {
match named_node_or_variable {
NamedNodePattern::Variable(variable) => {
TripleTemplateValue::Variable(variable_key(variables, variable))
}
NamedNodePattern::NamedNode(term) => {
TripleTemplateValue::Constant(self.build_term(term))
TripleTemplateValue::Variable(build_plan_variable(variables, variable))
}
NamedNodePattern::NamedNode(term) => TripleTemplateValue::Constant(PlanTerm {
encoded: self.build_term(term),
plain: term.clone().into(),
}),
}
}
fn convert_pattern_value_id(
from_value: &PatternValue,
from: &[Variable],
to: &mut Vec<Variable>,
) -> PatternValue {
fn convert_pattern_value_id(from_value: &PatternValue, to: &mut Vec<Variable>) -> PatternValue {
match from_value {
PatternValue::Constant(v) => PatternValue::Constant(v.clone()),
PatternValue::Constant(c) => PatternValue::Constant(c.clone()),
PatternValue::Variable(from_id) => {
PatternValue::Variable(Self::convert_variable_id(*from_id, from, to))
PatternValue::Variable(Self::convert_plan_variable(from_id, to))
}
PatternValue::TriplePattern(triple) => {
PatternValue::TriplePattern(Box::new(TriplePatternValue {
subject: Self::convert_pattern_value_id(&triple.subject, to),
predicate: Self::convert_pattern_value_id(&triple.predicate, to),
object: Self::convert_pattern_value_id(&triple.object, to),
}))
}
PatternValue::Triple(triple) => PatternValue::Triple(Box::new(TriplePatternValue {
subject: Self::convert_pattern_value_id(&triple.subject, from, to),
predicate: Self::convert_pattern_value_id(&triple.predicate, from, to),
object: Self::convert_pattern_value_id(&triple.object, from, to),
})),
}
}
fn convert_variable_id(from_id: usize, from: &[Variable], to: &mut Vec<Variable>) -> usize {
if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| {
if *var == from[from_id] {
fn convert_plan_variable(from_variable: &PlanVariable, to: &mut Vec<Variable>) -> PlanVariable {
let encoded = if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| {
if *var == from_variable.plain {
Some(to_id)
} else {
None
@ -1141,6 +1230,10 @@ impl<'a> PlanBuilder<'a> {
} else {
to.push(Variable::new_unchecked(format!("{:x}", random::<u128>())));
to.len() - 1
};
PlanVariable {
encoded,
plain: from_variable.plain.clone(),
}
}
@ -1228,8 +1321,8 @@ impl<'a> PlanBuilder<'a> {
let mut child_bound = BTreeSet::new();
Self::add_left_join_problematic_variables(child, &mut child_bound);
for (child_i, output_i) in mapping.iter() {
if child_bound.contains(child_i) {
set.insert(*output_i);
if child_bound.contains(&child_i.encoded) {
set.insert(output_i.encoded);
}
}
}
@ -1238,10 +1331,10 @@ impl<'a> PlanBuilder<'a> {
aggregates,
..
} => {
set.extend(key_variables.iter());
set.extend(key_variables.iter().map(|v| v.encoded));
//TODO: This is too harsh
for (_, var) in aggregates.iter() {
set.insert(*var);
set.insert(var.encoded);
}
}
}
@ -1371,21 +1464,21 @@ impl<'a> PlanBuilder<'a> {
PlanNode::Extend {
child,
expression,
position,
variable: position,
} => {
//TODO: handle the case where the filter generates an expression variable
if filter_variables.iter().all(|v| child.is_variable_bound(*v)) {
PlanNode::Extend {
child: Box::new(self.push_filter(child, filter)),
expression,
position,
variable: position,
}
} else {
PlanNode::Filter {
child: Box::new(PlanNode::Extend {
child,
expression,
position,
variable: position,
}),
expression: filter,
}
@ -1439,13 +1532,17 @@ impl<'a> PlanBuilder<'a> {
}
}
fn variable_key(variables: &mut Vec<Variable>, variable: &Variable) -> usize {
match slice_key(variables, variable) {
fn build_plan_variable(variables: &mut Vec<Variable>, variable: &Variable) -> PlanVariable {
let encoded = match slice_key(variables, variable) {
Some(key) => key,
None => {
variables.push(variable.clone());
variables.len() - 1
}
};
PlanVariable {
plain: variable.clone(),
encoded,
}
}

Loading…
Cancel
Save