diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 8c704e99..5a4a12ce 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -494,6 +494,15 @@ impl SimpleEvaluator { let child = self.plan_evaluator(child); Rc::new(move |from| Box::new(hash_deduplicate(child(from)))) } + PlanNode::Reduced { child } => { + let child = self.plan_evaluator(child); + Rc::new(move |from| { + Box::new(ConsecutiveDeduplication { + inner: child(from), + current: None, + }) + }) + } PlanNode::Skip { child, count } => { let child = self.plan_evaluator(child); let count = *count; @@ -3147,6 +3156,39 @@ impl Iterator for UnionIterator { } } +struct ConsecutiveDeduplication { + inner: EncodedTuplesIterator, + current: Option, +} + +impl Iterator for ConsecutiveDeduplication { + type Item = Result; + + fn next(&mut self) -> Option> { + // Basic idea. We buffer the previous result and we only emit it when we kow the next one or it's the end + loop { + if let Some(next) = self.inner.next() { + match next { + Ok(next) => match self.current.take() { + Some(current) if current != next => { + // We found a relevant value + self.current = Some(next); + return Some(Ok(current)); + } + _ => { + // We discard the value and move to the next one + self.current = Some(next); + } + }, + Err(error) => return Some(Err(error)), // We swap but it's fine. It's an error. + } + } else { + return self.current.take().map(Ok); + } + } + } +} + struct ConstructIterator { eval: SimpleEvaluator, iter: EncodedTuplesIterator, diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 64e82106..b48531e1 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -70,6 +70,10 @@ pub enum PlanNode { HashDeduplicate { child: Box, }, + /// Removes duplicated consecutive elements + Reduced { + child: Box, + }, Skip { child: Box, count: usize, @@ -173,6 +177,7 @@ impl PlanNode { } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } + | PlanNode::Reduced { child } | PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => child.lookup_used_variables(callback), PlanNode::Service { @@ -314,6 +319,7 @@ impl PlanNode { } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } + | PlanNode::Reduced { child } | PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => child.lookup_always_bound_variables(callback), PlanNode::Service { child, silent, .. } => { diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index bb7c84e5..d5b4da72 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -246,9 +246,9 @@ impl<'a> PlanBuilder<'a> { GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), }, - GraphPattern::Reduced { inner } => { - self.build_for_graph_pattern(inner, variables, graph_name)? - } + GraphPattern::Reduced { inner } => PlanNode::Reduced { + child: Box::new(self.build_for_graph_pattern(inner, variables, graph_name)?), + }, GraphPattern::Slice { inner, start, @@ -1096,6 +1096,7 @@ impl<'a> PlanBuilder<'a> { } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } + | PlanNode::Reduced { child } | PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => { self.add_left_join_problematic_variables(&*child, set) @@ -1186,6 +1187,7 @@ impl<'a> PlanBuilder<'a> { | PlanNode::Service { .. } | PlanNode::Sort { .. } | PlanNode::HashDeduplicate { .. } + | PlanNode::Reduced { .. } | PlanNode::Skip { .. } | PlanNode::Limit { .. } | PlanNode::Project { .. }