Adds custom aggregate functions to SPARQL parser and algebra

pull/71/head
Tpt 4 years ago
parent 43d8260acf
commit 91bcc04245
  1. 68
      lib/src/sparql/algebra.rs
  2. 44
      lib/src/sparql/parser.rs
  3. 39
      lib/src/sparql/plan_builder.rs

@ -932,7 +932,7 @@ pub enum GraphPattern {
Group { Group {
inner: Box<GraphPattern>, inner: Box<GraphPattern>,
by: Vec<Variable>, by: Vec<Variable>,
aggregates: Vec<(Variable, SetFunction)>, aggregates: Vec<(Variable, AggregationFunction)>,
}, },
/// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService) /// [Service](https://www.w3.org/TR/sparql11-federated-query/#defn_evalService)
Service { Service {
@ -1253,7 +1253,7 @@ impl<'a> fmt::Display for SparqlGraphPattern<'a> {
"{{ SELECT {} WHERE {{ {} }} GROUP BY {} }}", "{{ SELECT {} WHERE {{ {} }} GROUP BY {} }}",
aggregates aggregates
.iter() .iter()
.map(|(v, a)| format!("({} AS {})", SparqlAggregation(a), v)) .map(|(v, a)| format!("({} AS {})", SparqlAggregationFunction(a), v))
.chain(by.iter().map(|e| e.to_string())) .chain(by.iter().map(|e| e.to_string()))
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join(" "), .join(" "),
@ -1369,7 +1369,7 @@ fn build_sparql_select_arguments(args: &[Variable]) -> String {
/// A set function used in aggregates (c.f. [`GraphPattern::Group`]) /// A set function used in aggregates (c.f. [`GraphPattern::Group`])
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum SetFunction { pub enum AggregationFunction {
/// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount)
Count { Count {
expr: Option<Box<Expression>>, expr: Option<Box<Expression>>,
@ -1406,12 +1406,18 @@ pub enum SetFunction {
expr: Box<Expression>, expr: Box<Expression>,
distinct: bool, distinct: bool,
}, },
/// Custom function
Custom {
name: NamedNode,
expr: Box<Expression>,
distinct: bool,
},
} }
impl fmt::Display for SetFunction { impl fmt::Display for AggregationFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
SetFunction::Count { expr, distinct } => { AggregationFunction::Count { expr, distinct } => {
if *distinct { if *distinct {
if let Some(expr) = expr { if let Some(expr) = expr {
write!(f, "(count distinct {})", expr) write!(f, "(count distinct {})", expr)
@ -1424,42 +1430,42 @@ impl fmt::Display for SetFunction {
write!(f, "(count)") write!(f, "(count)")
} }
} }
SetFunction::Sum { expr, distinct } => { AggregationFunction::Sum { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "(sum distinct {})", expr) write!(f, "(sum distinct {})", expr)
} else { } else {
write!(f, "(sum {})", expr) write!(f, "(sum {})", expr)
} }
} }
SetFunction::Avg { expr, distinct } => { AggregationFunction::Avg { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "(avg distinct {})", expr) write!(f, "(avg distinct {})", expr)
} else { } else {
write!(f, "(avg {})", expr) write!(f, "(avg {})", expr)
} }
} }
SetFunction::Min { expr, distinct } => { AggregationFunction::Min { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "(min distinct {})", expr) write!(f, "(min distinct {})", expr)
} else { } else {
write!(f, "(min {})", expr) write!(f, "(min {})", expr)
} }
} }
SetFunction::Max { expr, distinct } => { AggregationFunction::Max { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "(max distinct {})", expr) write!(f, "(max distinct {})", expr)
} else { } else {
write!(f, "(max {})", expr) write!(f, "(max {})", expr)
} }
} }
SetFunction::Sample { expr, distinct } => { AggregationFunction::Sample { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "(sample distinct {})", expr) write!(f, "(sample distinct {})", expr)
} else { } else {
write!(f, "(sample {})", expr) write!(f, "(sample {})", expr)
} }
} }
SetFunction::GroupConcat { AggregationFunction::GroupConcat {
expr, expr,
distinct, distinct,
separator, separator,
@ -1476,16 +1482,27 @@ impl fmt::Display for SetFunction {
write!(f, "(group_concat {})", expr) write!(f, "(group_concat {})", expr)
} }
} }
AggregationFunction::Custom {
name,
expr,
distinct,
} => {
if *distinct {
write!(f, "({} distinct {})", name, expr)
} else {
write!(f, "({} {})", name, expr)
}
}
} }
} }
} }
struct SparqlAggregation<'a>(&'a SetFunction); struct SparqlAggregationFunction<'a>(&'a AggregationFunction);
impl<'a> fmt::Display for SparqlAggregation<'a> { impl<'a> fmt::Display for SparqlAggregationFunction<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0 { match self.0 {
SetFunction::Count { expr, distinct } => { AggregationFunction::Count { expr, distinct } => {
if *distinct { if *distinct {
if let Some(expr) = expr { if let Some(expr) = expr {
write!(f, "COUNT(DISTINCT {})", SparqlExpression(expr)) write!(f, "COUNT(DISTINCT {})", SparqlExpression(expr))
@ -1498,42 +1515,42 @@ impl<'a> fmt::Display for SparqlAggregation<'a> {
write!(f, "COUNT(*)") write!(f, "COUNT(*)")
} }
} }
SetFunction::Sum { expr, distinct } => { AggregationFunction::Sum { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "SUM(DISTINCT {})", SparqlExpression(expr)) write!(f, "SUM(DISTINCT {})", SparqlExpression(expr))
} else { } else {
write!(f, "SUM({})", SparqlExpression(expr)) write!(f, "SUM({})", SparqlExpression(expr))
} }
} }
SetFunction::Min { expr, distinct } => { AggregationFunction::Min { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "MIN(DISTINCT {})", SparqlExpression(expr)) write!(f, "MIN(DISTINCT {})", SparqlExpression(expr))
} else { } else {
write!(f, "MIN({})", SparqlExpression(expr)) write!(f, "MIN({})", SparqlExpression(expr))
} }
} }
SetFunction::Max { expr, distinct } => { AggregationFunction::Max { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "MAX(DISTINCT {})", SparqlExpression(expr)) write!(f, "MAX(DISTINCT {})", SparqlExpression(expr))
} else { } else {
write!(f, "MAX({})", SparqlExpression(expr)) write!(f, "MAX({})", SparqlExpression(expr))
} }
} }
SetFunction::Avg { expr, distinct } => { AggregationFunction::Avg { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "AVG(DISTINCT {})", SparqlExpression(expr)) write!(f, "AVG(DISTINCT {})", SparqlExpression(expr))
} else { } else {
write!(f, "AVG({})", SparqlExpression(expr)) write!(f, "AVG({})", SparqlExpression(expr))
} }
} }
SetFunction::Sample { expr, distinct } => { AggregationFunction::Sample { expr, distinct } => {
if *distinct { if *distinct {
write!(f, "SAMPLE(DISTINCT {})", SparqlExpression(expr)) write!(f, "SAMPLE(DISTINCT {})", SparqlExpression(expr))
} else { } else {
write!(f, "SAMPLE({})", SparqlExpression(expr)) write!(f, "SAMPLE({})", SparqlExpression(expr))
} }
} }
SetFunction::GroupConcat { AggregationFunction::GroupConcat {
expr, expr,
distinct, distinct,
separator, separator,
@ -1560,6 +1577,17 @@ impl<'a> fmt::Display for SparqlAggregation<'a> {
write!(f, "GROUP_CONCAT({})", SparqlExpression(expr)) write!(f, "GROUP_CONCAT({})", SparqlExpression(expr))
} }
} }
AggregationFunction::Custom {
name,
expr,
distinct,
} => {
if *distinct {
write!(f, "{}(DISTINCT {})", name, SparqlExpression(expr))
} else {
write!(f, "{}({})", name, SparqlExpression(expr))
}
}
} }
} }
} }

@ -487,7 +487,7 @@ pub struct ParserState {
namespaces: HashMap<String, String>, namespaces: HashMap<String, String>,
used_bnodes: HashSet<BlankNode>, used_bnodes: HashSet<BlankNode>,
currently_used_bnodes: HashSet<BlankNode>, currently_used_bnodes: HashSet<BlankNode>,
aggregates: Vec<Vec<(Variable, SetFunction)>>, aggregates: Vec<Vec<(Variable, AggregationFunction)>>,
} }
impl ParserState { impl ParserState {
@ -499,7 +499,7 @@ impl ParserState {
} }
} }
fn new_aggregation(&mut self, agg: SetFunction) -> Result<Variable, &'static str> { fn new_aggregation(&mut self, agg: AggregationFunction) -> Result<Variable, &'static str> {
let aggregates = self.aggregates.last_mut().ok_or("Unexpected aggregate")?; let aggregates = self.aggregates.last_mut().ok_or("Unexpected aggregate")?;
Ok(aggregates Ok(aggregates
.iter() .iter()
@ -1825,25 +1825,27 @@ parser! {
rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) } rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) }
//[127] //[127]
rule Aggregate() -> SetFunction = rule Aggregate() -> AggregationFunction =
i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { SetFunction::Count { expr: None, distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregationFunction::Count { expr: None, distinct: true } } /
i("COUNT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Count { expr: Some(Box::new(e)), distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Count { expr: Some(Box::new(e)), distinct: true } } /
i("COUNT") _ "(" _ "*" _ ")" { SetFunction::Count { expr: None, distinct: false } } / i("COUNT") _ "(" _ "*" _ ")" { AggregationFunction::Count { expr: None, distinct: false } } /
i("COUNT") _ "(" _ e:Expression() _ ")" { SetFunction::Count { expr: Some(Box::new(e)), distinct: false } } / i("COUNT") _ "(" _ e:Expression() _ ")" { AggregationFunction::Count { expr: Some(Box::new(e)), distinct: false } } /
i("SUM") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Sum { expr: Box::new(e), distinct: true } } / i("SUM") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Sum { expr: Box::new(e), distinct: true } } /
i("SUM") _ "(" _ e:Expression() _ ")" { SetFunction::Sum { expr: Box::new(e), distinct: false } } / i("SUM") _ "(" _ e:Expression() _ ")" { AggregationFunction::Sum { expr: Box::new(e), distinct: false } } /
i("MIN") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Min { expr: Box::new(e), distinct: true } } / i("MIN") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Min { expr: Box::new(e), distinct: true } } /
i("MIN") _ "(" _ e:Expression() _ ")" { SetFunction::Min { expr: Box::new(e), distinct: false } } / i("MIN") _ "(" _ e:Expression() _ ")" { AggregationFunction::Min { expr: Box::new(e), distinct: false } } /
i("MAX") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Max { expr: Box::new(e), distinct: true } } / i("MAX") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Max { expr: Box::new(e), distinct: true } } /
i("MAX") _ "(" _ e:Expression() _ ")" { SetFunction::Max { expr: Box::new(e), distinct: false } } / i("MAX") _ "(" _ e:Expression() _ ")" { AggregationFunction::Max { expr: Box::new(e), distinct: false } } /
i("AVG") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Avg { expr: Box::new(e), distinct: true } } / i("AVG") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Avg { expr: Box::new(e), distinct: true } } /
i("AVG") _ "(" _ e:Expression() _ ")" { SetFunction::Avg { expr: Box::new(e), distinct: false } } / i("AVG") _ "(" _ e:Expression() _ ")" { AggregationFunction::Avg { expr: Box::new(e), distinct: false } } /
i("SAMPLE") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::Sample { expr: Box::new(e), distinct: true } } / i("SAMPLE") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Sample { expr: Box::new(e), distinct: true } } /
i("SAMPLE") _ "(" _ e:Expression() _ ")" { SetFunction::Sample { expr: Box::new(e), distinct: false } } / i("SAMPLE") _ "(" _ e:Expression() _ ")" { AggregationFunction::Sample { expr: Box::new(e), distinct: false } } /
i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { SetFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } / i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregationFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } /
i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { SetFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: None } } / i("GROUP_CONCAT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: None } } /
i("GROUP_CONCAT") _ "(" _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { SetFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } / i("GROUP_CONCAT") _ "(" _ e:Expression() _ ";" _ i("SEPARATOR") _ "=" _ s:String() _ ")" { AggregationFunction::GroupConcat { expr: Box::new(e), distinct: true, separator: Some(s) } } /
i("GROUP_CONCAT") _ "(" _ e:Expression() _ ")" { SetFunction::GroupConcat { expr: Box::new(e), distinct: false, separator: None } } i("GROUP_CONCAT") _ "(" _ e:Expression() _ ")" { AggregationFunction::GroupConcat { expr: Box::new(e), distinct: false, separator: None } } /
name:iri() _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregationFunction::Custom { name, expr: Box::new(e), distinct: true } } /
name:iri() _ "(" _ e:Expression() _ ")" { AggregationFunction::Custom { name, expr: Box::new(e), distinct: false } }
//[128] //[128]
rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? { rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? {

@ -818,56 +818,59 @@ impl<E: WriteEncoder<Error = EvaluationError>> PlanBuilder<E> {
fn build_for_aggregate( fn build_for_aggregate(
&mut self, &mut self,
aggregate: &SetFunction, aggregate: &AggregationFunction,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
graph_name: PatternValue<E::StrId>, graph_name: PatternValue<E::StrId>,
) -> Result<PlanAggregation<E::StrId>, EvaluationError> { ) -> Result<PlanAggregation<E::StrId>, EvaluationError> {
Ok(match aggregate { match aggregate {
SetFunction::Count { expr, distinct } => PlanAggregation { AggregationFunction::Count { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Count, function: PlanAggregationFunction::Count,
parameter: match expr { parameter: match expr {
Some(expr) => Some(self.build_for_expression(expr, variables, graph_name)?), Some(expr) => Some(self.build_for_expression(expr, variables, graph_name)?),
None => None, None => None,
}, },
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::Sum { expr, distinct } => PlanAggregation { AggregationFunction::Sum { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Sum, function: PlanAggregationFunction::Sum,
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::Min { expr, distinct } => PlanAggregation { AggregationFunction::Min { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Min, function: PlanAggregationFunction::Min,
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::Max { expr, distinct } => PlanAggregation { AggregationFunction::Max { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Max, function: PlanAggregationFunction::Max,
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::Avg { expr, distinct } => PlanAggregation { AggregationFunction::Avg { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Avg, function: PlanAggregationFunction::Avg,
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::Sample { expr, distinct } => PlanAggregation { AggregationFunction::Sample { expr, distinct } => Ok(PlanAggregation {
function: PlanAggregationFunction::Sample, function: PlanAggregationFunction::Sample,
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
SetFunction::GroupConcat { AggregationFunction::GroupConcat {
expr, expr,
distinct, distinct,
separator, separator,
} => PlanAggregation { } => Ok(PlanAggregation {
function: PlanAggregationFunction::GroupConcat { function: PlanAggregationFunction::GroupConcat {
separator: Rc::new(separator.clone().unwrap_or_else(|| " ".to_string())), separator: Rc::new(separator.clone().unwrap_or_else(|| " ".to_string())),
}, },
parameter: Some(self.build_for_expression(expr, variables, graph_name)?), parameter: Some(self.build_for_expression(expr, variables, graph_name)?),
distinct: *distinct, distinct: *distinct,
}, }),
}) AggregationFunction::Custom { .. } => Err(EvaluationError::msg(
"Custom aggregation functions are not supported yet",
)),
}
} }
fn build_for_graph_template( fn build_for_graph_template(

Loading…
Cancel
Save