SPARQL: refactor AggregateExpression

Avoids code duplication
pull/673/head
Tpt 12 months ago committed by Thomas Tanon
parent 98caee8f92
commit f8034c68e9
  1. 223
      lib/spargebra/src/algebra.rs
  2. 40
      lib/spargebra/src/parser.rs
  3. 122
      lib/sparopt/src/algebra.rs
  4. 7
      lib/sparopt/src/optimizer.rs
  5. 85
      lib/src/sparql/eval.rs

@ -1114,46 +1114,11 @@ impl<'a> fmt::Display for SparqlGraphRootPattern<'a> {
/// A set function used in aggregates (c.f. [`GraphPattern::Group`]). /// A set function used in aggregates (c.f. [`GraphPattern::Group`]).
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum AggregateExpression { pub enum AggregateExpression {
/// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount). /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) with *.
Count { CountSolutions { distinct: bool },
expr: Option<Box<Expression>>, FunctionCall {
distinct: bool, name: AggregateFunction,
}, expr: Expression,
/// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum).
Sum {
expr: Box<Expression>,
distinct: bool,
},
/// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg).
Avg {
expr: Box<Expression>,
distinct: bool,
},
/// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin).
Min {
expr: Box<Expression>,
distinct: bool,
},
/// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax).
Max {
expr: Box<Expression>,
distinct: bool,
},
/// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat).
GroupConcat {
expr: Box<Expression>,
distinct: bool,
separator: Option<String>,
},
/// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample).
Sample {
expr: Box<Expression>,
distinct: bool,
},
/// Custom function.
Custom {
name: NamedNode,
expr: Box<Expression>,
distinct: bool, distinct: bool,
}, },
} }
@ -1162,82 +1127,39 @@ impl AggregateExpression {
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
match self { match self {
Self::Count { expr, distinct } => { Self::CountSolutions { distinct } => {
write!(f, "(sum")?; write!(f, "(count")?;
if *distinct {
write!(f, " distinct")?;
}
if let Some(expr) = expr {
write!(f, " ")?;
expr.fmt_sse(f)?;
}
write!(f, ")")
}
Self::Sum { expr, distinct } => {
write!(f, "(sum ")?;
if *distinct {
write!(f, "distinct ")?;
}
expr.fmt_sse(f)?;
write!(f, ")")
}
Self::Avg { expr, distinct } => {
write!(f, "(avg ")?;
if *distinct { if *distinct {
write!(f, " distinct")?; write!(f, " distinct")?;
} }
expr.fmt_sse(f)?;
write!(f, ")") write!(f, ")")
} }
Self::Min { expr, distinct } => { Self::FunctionCall {
write!(f, "(min ")?; name:
if *distinct { AggregateFunction::GroupConcat {
write!(f, "distinct ")?; separator: Some(separator),
} },
expr.fmt_sse(f)?;
write!(f, ")")
}
Self::Max { expr, distinct } => {
write!(f, "(max ")?;
if *distinct {
write!(f, "distinct ")?;
}
expr.fmt_sse(f)?;
write!(f, ")")
}
Self::Sample { expr, distinct } => {
write!(f, "(sample ")?;
if *distinct {
write!(f, "distinct ")?;
}
expr.fmt_sse(f)?;
write!(f, ")")
}
Self::GroupConcat {
expr, expr,
distinct, distinct,
separator,
} => { } => {
write!(f, "(group_concat ")?; write!(f, "(group_concat ")?;
if *distinct { if *distinct {
write!(f, "distinct ")?; write!(f, "distinct ")?;
} }
expr.fmt_sse(f)?; expr.fmt_sse(f)?;
if let Some(separator) = separator { write!(f, " {})", LiteralRef::new_simple_literal(separator))
write!(f, " {}", LiteralRef::new_simple_literal(separator))?;
}
write!(f, ")")
} }
Self::Custom { Self::FunctionCall {
name, name,
expr, expr,
distinct, distinct,
} => { } => {
write!(f, "({name}")?; write!(f, "(")?;
name.fmt_sse(f)?;
write!(f, " ")?;
if *distinct { if *distinct {
write!(f, "distinct ")?; write!(f, "distinct ")?;
} }
write!(f, " ")?;
expr.fmt_sse(f)?; expr.fmt_sse(f)?;
write!(f, ")") write!(f, ")")
} }
@ -1248,61 +1170,22 @@ impl AggregateExpression {
impl fmt::Display for AggregateExpression { impl fmt::Display for AggregateExpression {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Count { expr, distinct } => { Self::CountSolutions { distinct } => {
if *distinct { if *distinct {
if let Some(expr) = expr {
write!(f, "COUNT(DISTINCT {expr})")
} else {
write!(f, "COUNT(DISTINCT *)") write!(f, "COUNT(DISTINCT *)")
}
} else if let Some(expr) = expr {
write!(f, "COUNT({expr})")
} else { } else {
write!(f, "COUNT(*)") write!(f, "COUNT(*)")
} }
} }
Self::Sum { expr, distinct } => { Self::FunctionCall {
if *distinct { name:
write!(f, "SUM(DISTINCT {expr})") AggregateFunction::GroupConcat {
} else { separator: Some(separator),
write!(f, "SUM({expr})") },
}
}
Self::Min { expr, distinct } => {
if *distinct {
write!(f, "MIN(DISTINCT {expr})")
} else {
write!(f, "MIN({expr})")
}
}
Self::Max { expr, distinct } => {
if *distinct {
write!(f, "MAX(DISTINCT {expr})")
} else {
write!(f, "MAX({expr})")
}
}
Self::Avg { expr, distinct } => {
if *distinct {
write!(f, "AVG(DISTINCT {expr})")
} else {
write!(f, "AVG({expr})")
}
}
Self::Sample { expr, distinct } => {
if *distinct {
write!(f, "SAMPLE(DISTINCT {expr})")
} else {
write!(f, "SAMPLE({expr})")
}
}
Self::GroupConcat {
expr, expr,
distinct, distinct,
separator,
} => { } => {
if *distinct { if *distinct {
if let Some(separator) = separator {
write!( write!(
f, f,
"GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})", "GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})",
@ -1310,20 +1193,15 @@ impl fmt::Display for AggregateExpression {
LiteralRef::new_simple_literal(separator) LiteralRef::new_simple_literal(separator)
) )
} else { } else {
write!(f, "GROUP_CONCAT(DISTINCT {expr})")
}
} else if let Some(separator) = separator {
write!( write!(
f, f,
"GROUP_CONCAT({}; SEPARATOR = {})", "GROUP_CONCAT({}; SEPARATOR = {})",
expr, expr,
LiteralRef::new_simple_literal(separator) LiteralRef::new_simple_literal(separator)
) )
} else {
write!(f, "GROUP_CONCAT({expr})")
} }
} }
Self::Custom { Self::FunctionCall {
name, name,
expr, expr,
distinct, distinct,
@ -1338,6 +1216,59 @@ impl fmt::Display for AggregateExpression {
} }
} }
/// An aggregate function name.
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum AggregateFunction {
/// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) with *.
Count,
/// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum).
Sum,
/// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg).
Avg,
/// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin).
Min,
/// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax).
Max,
/// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat).
GroupConcat {
separator: Option<String>,
},
/// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample).
Sample,
Custom(NamedNode),
}
impl AggregateFunction {
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
pub(crate) fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
match self {
Self::Count => write!(f, "count"),
Self::Sum => write!(f, "sum"),
Self::Avg => write!(f, "avg"),
Self::Min => write!(f, "min"),
Self::Max => write!(f, "max"),
Self::GroupConcat { .. } => write!(f, "group_concat"),
Self::Sample => write!(f, "sample"),
Self::Custom(iri) => write!(f, "{iri}"),
}
}
}
impl fmt::Display for AggregateFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Count => write!(f, "COUNT"),
Self::Sum => write!(f, "SUM"),
Self::Avg => write!(f, "AVG"),
Self::Min => write!(f, "MIN"),
Self::Max => write!(f, "MAX"),
Self::GroupConcat { .. } => write!(f, "GROUP_CONCAT"),
Self::Sample => write!(f, "SAMPLE"),
Self::Custom(iri) => iri.fmt(f),
}
}
}
/// An ordering comparator used by [`GraphPattern::OrderBy`]. /// An ordering comparator used by [`GraphPattern::OrderBy`].
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum OrderExpression { pub enum OrderExpression {

@ -1918,26 +1918,26 @@ parser! {
rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) } rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) }
rule Aggregate() -> AggregateExpression = rule Aggregate() -> AggregateExpression =
i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregateExpression::Count { expr: None, distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregateExpression::CountSolutions { distinct: true } } /
i("COUNT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Count { expr: Some(Box::new(e)), distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Count, expr, distinct: true } } /
i("COUNT") _ "(" _ "*" _ ")" { AggregateExpression::Count { expr: None, distinct: false } } / i("COUNT") _ "(" _ "*" _ ")" { AggregateExpression::CountSolutions { distinct: false } } /
i("COUNT") _ "(" _ e:Expression() _ ")" { AggregateExpression::Count { expr: Some(Box::new(e)), distinct: false } } / i("COUNT") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Count, expr, distinct: false } } /
i("SUM") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Sum { expr: Box::new(e), distinct: true } } / i("SUM") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sum, expr, distinct: true } } /
i("SUM") _ "(" _ e:Expression() _ ")" { AggregateExpression::Sum { expr: Box::new(e), distinct: false } } / i("SUM") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sum, expr, distinct: false } } /
i("MIN") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Min { expr: Box::new(e), distinct: true } } / i("MIN") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Min, expr, distinct: true } } /
i("MIN") _ "(" _ e:Expression() _ ")" { AggregateExpression::Min { expr: Box::new(e), distinct: false } } / i("MIN") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Min, expr, distinct: false } } /
i("MAX") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Max { expr: Box::new(e), distinct: true } } / i("MAX") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Max, expr, distinct: true } } /
i("MAX") _ "(" _ e:Expression() _ ")" { AggregateExpression::Max { expr: Box::new(e), distinct: false } } / i("MAX") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Max, expr, distinct: false } } /
i("AVG") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Avg { expr: Box::new(e), distinct: true } } / i("AVG") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Avg, expr, distinct: true } } /
i("AVG") _ "(" _ e:Expression() _ ")" { AggregateExpression::Avg { expr: Box::new(e), distinct: false } } / i("AVG") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Avg, expr, distinct: false } } /
i("SAMPLE") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Sample { expr: Box::new(e), distinct: true } } / i("SAMPLE") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sample, expr, distinct: true } } /
i("SAMPLE") _ "(" _ e:Expression() _ ")" { AggregateExpression::Sample { expr: Box::new(e), distinct: false } } / i("SAMPLE") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Sample, expr, distinct: false } } /
i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } / i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: Some(s) }, expr, distinct: true } } /
i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::GroupConcat { expr: Box::new(e), distinct: true, separator: None } } / i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: None }, expr, distinct: true } } /
i("GROUP_CONCAT") _ "(" _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } / i("GROUP_CONCAT") _ "(" _ expr:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: Some(s) }, expr, distinct: true } } /
i("GROUP_CONCAT") _ "(" _ e:Expression() _ ")" { AggregateExpression::GroupConcat { expr: Box::new(e), distinct: false, separator: None } } / i("GROUP_CONCAT") _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::GroupConcat { separator: None }, expr, distinct: false } } /
name:iri() _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: true } } / name:iri() _ "(" _ i("DISTINCT") _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Custom(name), expr, distinct: true } } /
name:iri() _ "(" _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: false } } name:iri() _ "(" _ expr:Expression() _ ")" { AggregateExpression::FunctionCall { name: AggregateFunction::Custom(name), expr, distinct: false } }
rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? { rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? {
match a { match a {

@ -3,7 +3,7 @@
use oxrdf::vocab::xsd; use oxrdf::vocab::xsd;
use rand::random; use rand::random;
use spargebra::algebra::{ use spargebra::algebra::{
AggregateExpression as AlAggregateExpression, Expression as AlExpression, AggregateExpression as AlAggregateExpression, AggregateFunction, Expression as AlExpression,
GraphPattern as AlGraphPattern, OrderExpression as AlOrderExpression, GraphPattern as AlGraphPattern, OrderExpression as AlOrderExpression,
}; };
pub use spargebra::algebra::{Function, PropertyPathExpression}; pub use spargebra::algebra::{Function, PropertyPathExpression};
@ -1538,46 +1538,12 @@ impl Default for MinusAlgorithm {
/// A set function used in aggregates (c.f. [`GraphPattern::Group`]). /// A set function used in aggregates (c.f. [`GraphPattern::Group`]).
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum AggregateExpression { pub enum AggregateExpression {
/// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount). CountSolutions {
Count {
expr: Option<Box<Expression>>,
distinct: bool, distinct: bool,
}, },
/// [Sum](https://www.w3.org/TR/sparql11-query/#defn_aggSum). FunctionCall {
Sum { name: AggregateFunction,
expr: Box<Expression>, expr: Expression,
distinct: bool,
},
/// [Avg](https://www.w3.org/TR/sparql11-query/#defn_aggAvg).
Avg {
expr: Box<Expression>,
distinct: bool,
},
/// [Min](https://www.w3.org/TR/sparql11-query/#defn_aggMin).
Min {
expr: Box<Expression>,
distinct: bool,
},
/// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax).
Max {
expr: Box<Expression>,
distinct: bool,
},
/// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat).
GroupConcat {
expr: Box<Expression>,
distinct: bool,
separator: Option<String>,
},
/// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample).
Sample {
expr: Box<Expression>,
distinct: bool,
},
/// Custom function.
Custom {
name: NamedNode,
expr: Box<Expression>,
distinct: bool, distinct: bool,
}, },
} }
@ -1588,48 +1554,16 @@ impl AggregateExpression {
graph_name: Option<&NamedNodePattern>, graph_name: Option<&NamedNodePattern>,
) -> Self { ) -> Self {
match expression { match expression {
AlAggregateExpression::Count { expr, distinct } => Self::Count { AlAggregateExpression::CountSolutions { distinct } => Self::CountSolutions {
expr: expr
.as_ref()
.map(|e| Box::new(Expression::from_sparql_algebra(e, graph_name))),
distinct: *distinct,
},
AlAggregateExpression::Sum { expr, distinct } => Self::Sum {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct,
},
AlAggregateExpression::Avg { expr, distinct } => Self::Avg {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct,
},
AlAggregateExpression::Min { expr, distinct } => Self::Min {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct,
},
AlAggregateExpression::Max { expr, distinct } => Self::Max {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct, distinct: *distinct,
}, },
AlAggregateExpression::GroupConcat { AlAggregateExpression::FunctionCall {
expr,
distinct,
separator,
} => Self::GroupConcat {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct,
separator: separator.clone(),
},
AlAggregateExpression::Sample { expr, distinct } => Self::Sample {
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)),
distinct: *distinct,
},
AlAggregateExpression::Custom {
name, name,
expr, expr,
distinct, distinct,
} => Self::Custom { } => Self::FunctionCall {
name: name.clone(), name: name.clone(),
expr: Box::new(Expression::from_sparql_algebra(expr, graph_name)), expr: Expression::from_sparql_algebra(expr, graph_name),
distinct: *distinct, distinct: *distinct,
}, },
} }
@ -1639,46 +1573,16 @@ impl AggregateExpression {
impl From<&AggregateExpression> for AlAggregateExpression { impl From<&AggregateExpression> for AlAggregateExpression {
fn from(expression: &AggregateExpression) -> Self { fn from(expression: &AggregateExpression) -> Self {
match expression { match expression {
AggregateExpression::Count { expr, distinct } => Self::Count { AggregateExpression::CountSolutions { distinct } => Self::CountSolutions {
expr: expr.as_ref().map(|e| Box::new(e.as_ref().into())),
distinct: *distinct,
},
AggregateExpression::Sum { expr, distinct } => Self::Sum {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct,
},
AggregateExpression::Avg { expr, distinct } => Self::Avg {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct,
},
AggregateExpression::Min { expr, distinct } => Self::Min {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct,
},
AggregateExpression::Max { expr, distinct } => Self::Max {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct,
},
AggregateExpression::GroupConcat {
expr,
distinct,
separator,
} => Self::GroupConcat {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct,
separator: separator.clone(),
},
AggregateExpression::Sample { expr, distinct } => Self::Sample {
expr: Box::new(expr.as_ref().into()),
distinct: *distinct, distinct: *distinct,
}, },
AggregateExpression::Custom { AggregateExpression::FunctionCall {
name, name,
expr, expr,
distinct, distinct,
} => Self::Custom { } => Self::FunctionCall {
name: name.clone(), name: name.clone(),
expr: Box::new(expr.as_ref().into()), expr: expr.into(),
distinct: *distinct, distinct: *distinct,
}, },
} }

@ -157,11 +157,14 @@ impl Optimizer {
inner, inner,
variables, variables,
aggregates, aggregates,
} => GraphPattern::group( } => {
// TODO: min, max and sample don't care about DISTINCT
GraphPattern::group(
Self::normalize_pattern(*inner, input_types), Self::normalize_pattern(*inner, input_types),
variables, variables,
aggregates, aggregates,
), )
}
GraphPattern::Service { GraphPattern::Service {
name, name,
inner, inner,

@ -18,7 +18,7 @@ use rand::random;
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder};
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512}; use sha2::{Sha256, Sha384, Sha512};
use spargebra::algebra::{Function, PropertyPathExpression}; use spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression};
use spargebra::term::{ use spargebra::term::{
GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern,
TriplePattern, TriplePattern,
@ -974,16 +974,8 @@ impl SimpleEvaluator {
let aggregate_input_expressions = aggregates let aggregate_input_expressions = aggregates
.iter() .iter()
.map(|(_, expression)| match expression { .map(|(_, expression)| match expression {
AggregateExpression::Count { expr, .. } => expr.as_ref().map(|e| { AggregateExpression::CountSolutions { .. } => None,
self.expression_evaluator(e, encoded_variables, stat_children) AggregateExpression::FunctionCall { expr, .. } => {
}),
AggregateExpression::Sum { expr, .. }
| AggregateExpression::Avg { expr, .. }
| AggregateExpression::Min { expr, .. }
| AggregateExpression::Max { expr, .. }
| AggregateExpression::GroupConcat { expr, .. }
| AggregateExpression::Sample { expr, .. }
| AggregateExpression::Custom { expr, .. } => {
Some(self.expression_evaluator(expr, encoded_variables, stat_children)) Some(self.expression_evaluator(expr, encoded_variables, stat_children))
} }
}) })
@ -1101,52 +1093,26 @@ impl SimpleEvaluator {
dataset: &Rc<DatasetView>, dataset: &Rc<DatasetView>,
expression: &AggregateExpression, expression: &AggregateExpression,
) -> Box<dyn Fn() -> Box<dyn Accumulator>> { ) -> Box<dyn Fn() -> Box<dyn Accumulator>> {
match expression { let mut accumulator: Box<dyn Fn() -> Box<dyn Accumulator>> = match expression {
AggregateExpression::Count { distinct, .. } => { AggregateExpression::CountSolutions { .. } => {
if *distinct {
Box::new(|| Box::new(DistinctAccumulator::new(CountAccumulator::default())))
} else {
Box::new(|| Box::<CountAccumulator>::default()) Box::new(|| Box::<CountAccumulator>::default())
} }
} AggregateExpression::FunctionCall { name, .. } => match name {
AggregateExpression::Sum { distinct, .. } => { AggregateFunction::Count => Box::new(|| Box::<CountAccumulator>::default()),
if *distinct { AggregateFunction::Sum => Box::new(|| Box::<SumAccumulator>::default()),
Box::new(|| Box::new(DistinctAccumulator::new(SumAccumulator::default()))) AggregateFunction::Min => {
} else {
Box::new(|| Box::<SumAccumulator>::default())
}
}
AggregateExpression::Min { .. } => {
let dataset = Rc::clone(dataset); let dataset = Rc::clone(dataset);
Box::new(move || Box::new(MinAccumulator::new(Rc::clone(&dataset)))) Box::new(move || Box::new(MinAccumulator::new(Rc::clone(&dataset))))
} // DISTINCT does not make sense with min }
AggregateExpression::Max { .. } => { AggregateFunction::Max => {
let dataset = Rc::clone(dataset); let dataset = Rc::clone(dataset);
Box::new(move || Box::new(MaxAccumulator::new(Rc::clone(&dataset)))) Box::new(move || Box::new(MaxAccumulator::new(Rc::clone(&dataset))))
} // DISTINCT does not make sense with max
AggregateExpression::Avg { distinct, .. } => {
if *distinct {
Box::new(|| Box::new(DistinctAccumulator::new(AvgAccumulator::default())))
} else {
Box::new(|| Box::<AvgAccumulator>::default())
} }
} AggregateFunction::Avg => Box::new(|| Box::<AvgAccumulator>::default()),
AggregateExpression::Sample { .. } => Box::new(|| Box::<SampleAccumulator>::default()), // DISTINCT does not make sense with sample AggregateFunction::Sample => Box::new(|| Box::<SampleAccumulator>::default()),
AggregateExpression::GroupConcat { AggregateFunction::GroupConcat { separator } => {
distinct,
separator,
..
} => {
let dataset = Rc::clone(dataset); let dataset = Rc::clone(dataset);
let separator = Rc::from(separator.as_deref().unwrap_or(" ")); let separator = Rc::from(separator.as_deref().unwrap_or(" "));
if *distinct {
Box::new(move || {
Box::new(DistinctAccumulator::new(GroupConcatAccumulator::new(
Rc::clone(&dataset),
Rc::clone(&separator),
)))
})
} else {
Box::new(move || { Box::new(move || {
Box::new(GroupConcatAccumulator::new( Box::new(GroupConcatAccumulator::new(
Rc::clone(&dataset), Rc::clone(&dataset),
@ -1154,9 +1120,17 @@ impl SimpleEvaluator {
)) ))
}) })
} }
AggregateFunction::Custom(_) => Box::new(|| Box::new(FailingAccumulator)),
},
};
if matches!(
expression,
AggregateExpression::CountSolutions { distinct: true }
| AggregateExpression::FunctionCall { distinct: true, .. }
) {
accumulator = Box::new(move || Box::new(Deduplicate::new(accumulator())));
} }
AggregateExpression::Custom { .. } => Box::new(|| Box::new(FailingAccumulator)), accumulator
}
} }
fn expression_evaluator( fn expression_evaluator(
@ -5262,14 +5236,13 @@ trait Accumulator {
fn state(&self) -> Option<EncodedTerm>; fn state(&self) -> Option<EncodedTerm>;
} }
#[derive(Default, Debug)] struct Deduplicate {
struct DistinctAccumulator<T: Accumulator> {
seen: HashSet<Option<EncodedTerm>>, seen: HashSet<Option<EncodedTerm>>,
inner: T, inner: Box<dyn Accumulator>,
} }
impl<T: Accumulator> DistinctAccumulator<T> { impl Deduplicate {
fn new(inner: T) -> Self { fn new(inner: Box<dyn Accumulator>) -> Self {
Self { Self {
seen: HashSet::default(), seen: HashSet::default(),
inner, inner,
@ -5277,7 +5250,7 @@ impl<T: Accumulator> DistinctAccumulator<T> {
} }
} }
impl<T: Accumulator> Accumulator for DistinctAccumulator<T> { impl Accumulator for Deduplicate {
fn add(&mut self, element: Option<EncodedTerm>) { fn add(&mut self, element: Option<EncodedTerm>) {
if self.seen.insert(element.clone()) { if self.seen.insert(element.clone()) {
self.inner.add(element) self.inner.add(element)

Loading…
Cancel
Save