Reduces the number of considered "problem variables" in OPTIONAL

Runs the optional-filter tests
pull/35/head
Tpt 5 years ago
parent 9060998f79
commit 16451b7c53
  1. 35
      lib/src/sparql/algebra.rs
  2. 105
      lib/src/sparql/eval.rs
  3. 17
      lib/src/sparql/parser.rs
  4. 126
      lib/src/sparql/plan.rs
  5. 89
      lib/src/sparql/plan_builder.rs
  6. 2
      lib/tests/rdf-tests
  7. 4
      lib/tests/sparql_test_cases.rs

@ -554,7 +554,7 @@ impl fmt::Display for Function {
pub enum GraphPattern {
BGP(Vec<TripleOrPathPattern>),
Join(Box<GraphPattern>, Box<GraphPattern>),
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Expression),
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Option<Expression>),
Filter(Expression, Box<GraphPattern>),
Union(Box<GraphPattern>, Box<GraphPattern>),
Graph(NamedNodeOrVariable, Box<GraphPattern>),
@ -582,7 +582,13 @@ impl fmt::Display for GraphPattern {
.join(" . ")
),
GraphPattern::Join(a, b) => write!(f, "Join({}, {})", a, b),
GraphPattern::LeftJoin(a, b, e) => write!(f, "LeftJoin({}, {}, {})", a, b, e),
GraphPattern::LeftJoin(a, b, e) => {
if let Some(e) = e {
write!(f, "LeftJoin({}, {}, {})", a, b, e)
} else {
write!(f, "LeftJoin({}, {})", a, b)
}
}
GraphPattern::Filter(e, p) => write!(f, "Filter({}, {})", e, p),
GraphPattern::Union(a, b) => write!(f, "Union({}, {})", a, b),
GraphPattern::Graph(g, p) => write!(f, "Graph({}, {})", g, p),
@ -750,13 +756,24 @@ impl<'a> fmt::Display for SparqlGraphPattern<'a> {
SparqlGraphPattern(&*a),
SparqlGraphPattern(&*b)
),
GraphPattern::LeftJoin(a, b, e) => write!(
f,
"{} OPTIONAL {{ {} FILTER({}) }}",
SparqlGraphPattern(&*a),
SparqlGraphPattern(&*b),
SparqlExpression(e)
),
GraphPattern::LeftJoin(a, b, e) => {
if let Some(e) = e {
write!(
f,
"{} OPTIONAL {{ {} FILTER({}) }}",
SparqlGraphPattern(&*a),
SparqlGraphPattern(&*b),
SparqlExpression(e)
)
} else {
write!(
f,
"{} OPTIONAL {{ {} }}",
SparqlGraphPattern(&*a),
SparqlGraphPattern(&*b)
)
}
}
GraphPattern::Filter(e, p) => write!(
f,
"{} FILTER({})",

@ -303,22 +303,21 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
right,
possible_problem_vars,
} => {
let problem_vars = bind_variables_in_set(&from, possible_problem_vars);
let mut filtered_from = from.clone();
unbind_variables(&mut filtered_from, &problem_vars);
let iter = LeftJoinIterator {
eval: self,
right_plan: &*right,
left_iter: self.eval_plan(&*left, filtered_from),
current_right: Box::new(empty()),
};
if problem_vars.is_empty() {
Box::new(iter)
if possible_problem_vars.is_empty() {
Box::new(LeftJoinIterator {
eval: self,
right_plan: &*right,
left_iter: self.eval_plan(&*left, from),
current_right: Box::new(empty()),
})
} else {
Box::new(BadLeftJoinIterator {
input: from,
iter,
problem_vars,
eval: self,
right_plan: &*right,
left_iter: self.eval_plan(&*left, from),
current_left: None,
current_right: Box::new(empty()),
problem_vars: possible_problem_vars.as_slice(),
})
}
}
@ -1959,19 +1958,27 @@ fn put_variable_value(
}
}
fn bind_variables_in_set(binding: &EncodedTuple, set: &[usize]) -> Vec<usize> {
set.iter()
.cloned()
.filter(|key| binding.contains(*key))
.collect()
}
fn unbind_variables(binding: &mut EncodedTuple, variables: &[usize]) {
for var in variables {
binding.unset(*var)
}
}
fn combine_tuples(mut a: EncodedTuple, b: &EncodedTuple, vars: &[usize]) -> Option<EncodedTuple> {
for var in vars {
if let Some(b_value) = b.get(*var) {
if let Some(a_value) = a.get(*var) {
if a_value != b_value {
return None;
}
} else {
a.set(*var, b_value);
}
}
}
Some(a)
}
pub fn are_compatible_and_not_disjointed(a: &EncodedTuple, b: &EncodedTuple) -> bool {
let mut found_intersection = false;
for (a_value, b_value) in a.iter().zip(b.iter()) {
@ -2066,37 +2073,53 @@ impl<'a, S: StoreConnection> Iterator for LeftJoinIterator<'a, S> {
}
struct BadLeftJoinIterator<'a, S: StoreConnection> {
input: EncodedTuple,
iter: LeftJoinIterator<'a, S>,
problem_vars: Vec<usize>,
eval: &'a SimpleEvaluator<S>,
right_plan: &'a PlanNode,
left_iter: EncodedTuplesIterator<'a>,
current_left: Option<EncodedTuple>,
current_right: EncodedTuplesIterator<'a>,
problem_vars: &'a [usize],
}
impl<'a, S: StoreConnection> Iterator for BadLeftJoinIterator<'a, S> {
type Item = Result<EncodedTuple>;
fn next(&mut self) -> Option<Result<EncodedTuple>> {
loop {
match self.iter.next()? {
Ok(mut tuple) => {
let mut conflict = false;
for problem_var in &self.problem_vars {
if let Some(input_value) = self.input.get(*problem_var) {
if let Some(result_value) = tuple.get(*problem_var) {
if input_value != result_value {
conflict = true;
continue; //Binding conflict
}
} else {
tuple.set(*problem_var, input_value);
while let Some(right_tuple) = self.current_right.next() {
match right_tuple {
Ok(right_tuple) => {
if let Some(combined) = combine_tuples(
right_tuple,
self.current_left.as_ref().unwrap(),
self.problem_vars,
) {
return Some(Ok(combined));
}
}
Err(error) => return Some(Err(error)),
}
}
match self.left_iter.next()? {
Ok(left_tuple) => {
let mut filtered_left = left_tuple.clone();
unbind_variables(&mut filtered_left, self.problem_vars);
self.current_right = self.eval.eval_plan(self.right_plan, filtered_left);
while let Some(right_tuple) = self.current_right.next() {
match right_tuple {
Ok(right_tuple) => {
if let Some(combined) =
combine_tuples(right_tuple, &left_tuple, self.problem_vars)
{
self.current_left = Some(left_tuple);
return Some(Ok(combined));
}
}
}
if !conflict {
return Some(Ok(tuple));
Err(error) => return Some(Err(error)),
}
}
Err(error) => return Some(Err(error)),
Some(Ok(left_tuple))
}
Err(error) => Some(Err(error)),
}
}
}

@ -132,7 +132,7 @@ impl<F, T: From<F>> From<FocusedTriplePattern<F>> for FocusedTripleOrPathPattern
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
enum PartialGraphPattern {
Optional(GraphPattern),
Optional(GraphPattern, Option<Expression>),
Minus(GraphPattern),
Bind(Expression, Variable),
Filter(Expression),
@ -776,13 +776,8 @@ parser! {
let mut g = GraphPattern::default();
for e in p {
match e {
PartialGraphPattern::Optional(p) => match p {
GraphPattern::Filter(f, a2) => {
g = GraphPattern::LeftJoin(Box::new(g), a2, f)
}
a => {
g = GraphPattern::LeftJoin(Box::new(g), Box::new(a), Literal::from(true).into())
}
PartialGraphPattern::Optional(p, f) => {
g = GraphPattern::LeftJoin(Box::new(g), Box::new(p), f)
}
PartialGraphPattern::Minus(p) => {
g = GraphPattern::Minus(Box::new(g), Box::new(p))
@ -834,7 +829,11 @@ parser! {
//[57]
rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() {
PartialGraphPattern::Optional(p)
if let GraphPattern::Filter(f, p) = p {
PartialGraphPattern::Optional(*p, Some(f))
} else {
PartialGraphPattern::Optional(p, None)
}
}
//[58]

@ -96,7 +96,7 @@ impl PlanNode {
set
}
fn add_maybe_bound_variables(&self, set: &mut BTreeSet<usize>) {
pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet<usize>) {
match self {
PlanNode::Init => (),
PlanNode::StaticBindings { tuples } => {
@ -147,7 +147,10 @@ impl PlanNode {
}
child.add_maybe_bound_variables(set);
}
PlanNode::Filter { child, .. } => child.add_maybe_bound_variables(set),
PlanNode::Filter { child, expression } => {
expression.add_maybe_bound_variables(set);
child.add_maybe_bound_variables(set);
}
PlanNode::Union { children } => {
for child in children {
child.add_maybe_bound_variables(set);
@ -160,9 +163,12 @@ impl PlanNode {
right.add_maybe_bound_variables(set);
}
PlanNode::Extend {
child, position, ..
child,
position,
expression,
} => {
set.insert(*position);
expression.add_maybe_bound_variables(set);
child.add_maybe_bound_variables(set);
}
PlanNode::Service { child, .. }
@ -307,6 +313,120 @@ pub enum PlanExpression {
StringCast(Box<PlanExpression>),
}
impl PlanExpression {
pub fn add_maybe_bound_variables(&self, set: &mut BTreeSet<usize>) {
match self {
PlanExpression::Variable(v) | PlanExpression::Bound(v) => {
set.insert(*v);
}
PlanExpression::Constant(_)
| PlanExpression::Rand
| PlanExpression::Now
| PlanExpression::UUID
| PlanExpression::StrUUID
| PlanExpression::BNode(None) => (),
PlanExpression::UnaryPlus(e)
| PlanExpression::UnaryMinus(e)
| PlanExpression::UnaryNot(e)
| PlanExpression::BNode(Some(e))
| PlanExpression::Str(e)
| PlanExpression::Lang(e)
| PlanExpression::Datatype(e)
| PlanExpression::IRI(e)
| PlanExpression::Abs(e)
| PlanExpression::Ceil(e)
| PlanExpression::Floor(e)
| PlanExpression::Round(e)
| PlanExpression::UCase(e)
| PlanExpression::LCase(e)
| PlanExpression::StrLen(e)
| PlanExpression::EncodeForURI(e)
| PlanExpression::Year(e)
| PlanExpression::Month(e)
| PlanExpression::Day(e)
| PlanExpression::Hours(e)
| PlanExpression::Minutes(e)
| PlanExpression::Seconds(e)
| PlanExpression::Timezone(e)
| PlanExpression::Tz(e)
| PlanExpression::MD5(e)
| PlanExpression::SHA1(e)
| PlanExpression::SHA256(e)
| PlanExpression::SHA384(e)
| PlanExpression::SHA512(e)
| PlanExpression::IsIRI(e)
| PlanExpression::IsBlank(e)
| PlanExpression::IsLiteral(e)
| PlanExpression::IsNumeric(e)
| PlanExpression::BooleanCast(e)
| PlanExpression::DoubleCast(e)
| PlanExpression::FloatCast(e)
| PlanExpression::DecimalCast(e)
| PlanExpression::IntegerCast(e)
| PlanExpression::DateCast(e)
| PlanExpression::TimeCast(e)
| PlanExpression::DateTimeCast(e)
| PlanExpression::DurationCast(e)
| PlanExpression::StringCast(e) => e.add_maybe_bound_variables(set),
PlanExpression::Or(a, b)
| PlanExpression::And(a, b)
| PlanExpression::Equal(a, b)
| PlanExpression::NotEqual(a, b)
| PlanExpression::Greater(a, b)
| PlanExpression::GreaterOrEq(a, b)
| PlanExpression::Lower(a, b)
| PlanExpression::LowerOrEq(a, b)
| PlanExpression::Add(a, b)
| PlanExpression::Sub(a, b)
| PlanExpression::Mul(a, b)
| PlanExpression::Div(a, b)
| PlanExpression::LangMatches(a, b)
| PlanExpression::Contains(a, b)
| PlanExpression::StrStarts(a, b)
| PlanExpression::StrEnds(a, b)
| PlanExpression::StrBefore(a, b)
| PlanExpression::StrAfter(a, b)
| PlanExpression::StrLang(a, b)
| PlanExpression::StrDT(a, b)
| PlanExpression::SameTerm(a, b)
| PlanExpression::SubStr(a, b, None)
| PlanExpression::Regex(a, b, None) => {
a.add_maybe_bound_variables(set);
b.add_maybe_bound_variables(set);
}
PlanExpression::If(a, b, c)
| PlanExpression::SubStr(a, b, Some(c))
| PlanExpression::Regex(a, b, Some(c))
| PlanExpression::Replace(a, b, c, None) => {
a.add_maybe_bound_variables(set);
b.add_maybe_bound_variables(set);
c.add_maybe_bound_variables(set);
}
PlanExpression::Replace(a, b, c, Some(d)) => {
a.add_maybe_bound_variables(set);
b.add_maybe_bound_variables(set);
c.add_maybe_bound_variables(set);
d.add_maybe_bound_variables(set);
}
PlanExpression::Concat(es) | PlanExpression::Coalesce(es) => {
for e in es {
e.add_maybe_bound_variables(set);
}
}
PlanExpression::In(a, bs) => {
a.add_maybe_bound_variables(set);
for b in bs {
b.add_maybe_bound_variables(set);
}
}
PlanExpression::Exists(e) => {
e.add_maybe_bound_variables(set);
}
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct PlanAggregation {
pub function: PlanAggregationFunction,

@ -1,5 +1,4 @@
use crate::model::vocab::xsd;
use crate::model::Literal;
use crate::sparql::algebra::*;
use crate::sparql::model::*;
use crate::sparql::plan::PlanPropertyPath;
@ -7,7 +6,7 @@ use crate::sparql::plan::*;
use crate::store::numeric_encoder::{Encoder, ENCODED_DEFAULT_GRAPH};
use crate::Error;
use crate::Result;
use std::collections::HashSet;
use std::collections::BTreeSet;
pub struct PlanBuilder<E: Encoder> {
encoder: E,
@ -47,25 +46,24 @@ impl<E: Encoder> PlanBuilder<E> {
GraphPattern::LeftJoin(a, b, e) => {
let left = self.build_for_graph_pattern(a, variables, graph_name)?;
let right = self.build_for_graph_pattern(b, variables, graph_name)?;
let mut possible_problem_vars = BTreeSet::new();
self.add_left_join_problematic_variables(&right, &mut possible_problem_vars);
//We add the extra filter if needed
let right = if *e == Expression::from(Literal::from(true)) {
right
} else {
let right = if let Some(e) = e {
PlanNode::Filter {
child: Box::new(right),
expression: self.build_for_expression(e, variables, graph_name)?,
}
} else {
right
};
let possible_problem_vars = right
.maybe_bound_variables()
.difference(&left.maybe_bound_variables())
.cloned()
.collect();
PlanNode::LeftJoin {
left: Box::new(left),
right: Box::new(right),
possible_problem_vars,
possible_problem_vars: possible_problem_vars.into_iter().collect(),
}
}
GraphPattern::Filter(e, p) => PlanNode::Filter {
@ -909,6 +907,69 @@ impl<E: Encoder> PlanBuilder<E> {
to.len() - 1
}
}
fn add_left_join_problematic_variables(&self, node: &PlanNode, set: &mut BTreeSet<usize>) {
match node {
PlanNode::Init
| PlanNode::StaticBindings { .. }
| PlanNode::QuadPatternJoin { .. }
| PlanNode::PathPatternJoin { .. } => (),
PlanNode::Filter { child, expression } => {
expression.add_maybe_bound_variables(set); //TODO: only if it is not already bound
self.add_left_join_problematic_variables(&*child, set);
}
PlanNode::Union { children } => {
for child in children {
self.add_left_join_problematic_variables(&*child, set);
}
}
PlanNode::Join { left, right, .. } => {
self.add_left_join_problematic_variables(&*left, set);
self.add_left_join_problematic_variables(&*right, set);
}
PlanNode::AntiJoin { left, .. } => {
self.add_left_join_problematic_variables(&*left, set);
}
PlanNode::LeftJoin { left, right, .. } => {
self.add_left_join_problematic_variables(&*left, set);
right.add_maybe_bound_variables(set);
}
PlanNode::Extend {
child, expression, ..
} => {
expression.add_maybe_bound_variables(set); //TODO: only if it is not already bound
self.add_left_join_problematic_variables(&*child, set);
self.add_left_join_problematic_variables(&*child, set);
}
PlanNode::Service { child, .. }
| PlanNode::Sort { child, .. }
| PlanNode::HashDeduplicate { child }
| PlanNode::Skip { child, .. }
| PlanNode::Limit { child, .. } => {
self.add_left_join_problematic_variables(&*child, set)
}
PlanNode::Project { mapping, child } => {
let mut child_bound = BTreeSet::new();
self.add_left_join_problematic_variables(&*child, &mut child_bound);
for (child_i, output_i) in mapping.iter() {
if child_bound.contains(child_i) {
set.insert(*output_i);
}
}
}
PlanNode::Aggregate {
key_mapping,
aggregates,
..
} => {
set.extend(key_mapping.iter().map(|(_, o)| o));
//TODO: This is too harsh
for (_, var) in aggregates {
set.insert(*var);
}
}
}
}
}
fn variable_key(variables: &mut Vec<Variable>, variable: &Variable) -> usize {
@ -931,7 +992,7 @@ fn slice_key<T: Eq>(slice: &[T], element: &T) -> Option<usize> {
}
fn sort_bgp(p: &[TripleOrPathPattern]) -> Vec<&TripleOrPathPattern> {
let mut assigned_variables = HashSet::default();
let mut assigned_variables = BTreeSet::default();
let mut new_p: Vec<_> = p.iter().collect();
for i in 0..new_p.len() {
@ -947,7 +1008,7 @@ fn sort_bgp(p: &[TripleOrPathPattern]) -> Vec<&TripleOrPathPattern> {
fn count_pattern_binds(
pattern: &TripleOrPathPattern,
assigned_variables: &HashSet<&Variable>,
assigned_variables: &BTreeSet<&Variable>,
) -> u8 {
let mut count = 12;
if let TermOrVariable::Variable(v) = pattern.subject() {
@ -980,7 +1041,7 @@ fn count_pattern_binds(
fn add_pattern_variables<'a>(
pattern: &'a TripleOrPathPattern,
variables: &mut HashSet<&'a Variable>,
variables: &mut BTreeSet<&'a Variable>,
) {
if let TermOrVariable::Variable(v) = pattern.subject() {
variables.insert(v);

@ -1 +1 @@
Subproject commit 280e9de3aaefa6b292a151bd455204d49a0c09db
Subproject commit dc237e319e6562f2913341f6ba964ecbcbbf4499

@ -78,6 +78,7 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> {
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/regex/manifest.ttl",
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/solution-seq/manifest.ttl",
@ -134,7 +135,8 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> {
NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5").unwrap(),
// We use XSD 1.1 equality on dates
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/open-world/manifest#date-2").unwrap(),
// We choose to simplify first the nested group patterns in OPTIONAL
NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified").unwrap(),
];
let tests: Result<Vec<_>> = manifest_10_urls

Loading…
Cancel
Save