use crate::sparql::eval::StringOrStoreString; use crate::store::numeric_encoder::{ EncodedQuad, EncodedTerm, Encoder, MemoryStrStore, StrContainer, StrLookup, ENCODED_DEFAULT_GRAPH, }; use crate::store::StoreConnection; use crate::Result; use std::cell::{RefCell, RefMut}; use std::collections::BTreeSet; pub type EncodedTuple = Vec>; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum PlanNode { Init, StaticBindings { tuples: Vec, }, QuadPatternJoin { child: Box, subject: PatternValue, predicate: PatternValue, object: PatternValue, graph_name: PatternValue, }, PathPatternJoin { child: Box, subject: PatternValue, path: PlanPropertyPath, object: PatternValue, graph_name: PatternValue, }, Join { left: Box, right: Box, }, AntiJoin { left: Box, right: Box, }, Filter { child: Box, expression: PlanExpression, }, Union { children: Vec, }, LeftJoin { left: Box, right: Box, possible_problem_vars: Vec, //Variables that should not be part of the entry of the left join }, Extend { child: Box, position: usize, expression: PlanExpression, }, Sort { child: Box, by: Vec, }, HashDeduplicate { child: Box, }, Skip { child: Box, count: usize, }, Limit { child: Box, count: usize, }, Project { child: Box, mapping: Vec<(usize, usize)>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() child: Box, key_mapping: Vec, //index of the new key for each old key (that is the vec key) aggregates: Vec<(PlanAggregation, usize)>, }, } impl PlanNode { pub fn variables(&self) -> BTreeSet { let mut set = BTreeSet::default(); self.add_variables(&mut set); set } fn add_variables(&self, set: &mut BTreeSet) { match self { PlanNode::Init => (), PlanNode::StaticBindings { tuples } => { for tuple in tuples { for (key, value) in tuple.iter().enumerate() { if value.is_some() { set.insert(key); } } } } PlanNode::QuadPatternJoin { child, subject, predicate, object, graph_name, } => { if let PatternValue::Variable(var) = subject { set.insert(*var); } if let PatternValue::Variable(var) = predicate { set.insert(*var); } if let PatternValue::Variable(var) = object { set.insert(*var); } if let PatternValue::Variable(var) = graph_name { set.insert(*var); } child.add_variables(set); } PlanNode::PathPatternJoin { child, subject, object, graph_name, .. } => { if let PatternValue::Variable(var) = subject { set.insert(*var); } if let PatternValue::Variable(var) = object { set.insert(*var); } if let PatternValue::Variable(var) = graph_name { set.insert(*var); } child.add_variables(set); } PlanNode::Filter { child, expression } => { child.add_variables(set); expression.add_variables(set); } PlanNode::Union { children } => { for child in children { child.add_variables(set); } } PlanNode::Join { left, right } | PlanNode::AntiJoin { left, right } | PlanNode::LeftJoin { left, right, .. } => { left.add_variables(set); right.add_variables(set); } PlanNode::Extend { child, position, .. } => { set.insert(*position); child.add_variables(set); } PlanNode::Sort { child, .. } => child.add_variables(set), PlanNode::HashDeduplicate { child } => child.add_variables(set), PlanNode::Skip { child, .. } => child.add_variables(set), PlanNode::Limit { child, .. } => child.add_variables(set), PlanNode::Project { child: _, mapping } => { for i in 0..mapping.len() { set.insert(i); } } PlanNode::Aggregate { key_mapping, aggregates, .. } => { set.extend(key_mapping); for (_, var) in aggregates { set.insert(*var); } } } } } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub enum PatternValue { Constant(EncodedTerm), Variable(usize), } impl PatternValue { pub fn is_var(&self) -> bool { match self { PatternValue::Constant(_) => false, PatternValue::Variable(_) => true, } } } #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum PlanExpression { Constant(EncodedTerm), Variable(usize), Exists(Box), Or(Box, Box), And(Box, Box), Equal(Box, Box), NotEqual(Box, Box), Greater(Box, Box), GreaterOrEq(Box, Box), Lower(Box, Box), LowerOrEq(Box, Box), In(Box, Vec), Add(Box, Box), Sub(Box, Box), Mul(Box, Box), Div(Box, Box), UnaryPlus(Box), UnaryMinus(Box), UnaryNot(Box), Str(Box), Lang(Box), LangMatches(Box, Box), Datatype(Box), Bound(usize), IRI(Box), BNode(Option>), Rand, Abs(Box), Ceil(Box), Floor(Box), Round(Box), Concat(Vec), SubStr( Box, Box, Option>, ), StrLen(Box), Replace( Box, Box, Box, Option>, ), UCase(Box), LCase(Box), EncodeForURI(Box), Contains(Box, Box), StrStarts(Box, Box), StrEnds(Box, Box), StrBefore(Box, Box), StrAfter(Box, Box), Year(Box), Month(Box), Day(Box), Hours(Box), Minutes(Box), Seconds(Box), Timezone(Box), Tz(Box), Now, UUID, StrUUID, MD5(Box), SHA1(Box), SHA256(Box), SHA384(Box), SHA512(Box), Coalesce(Vec), If( Box, Box, Box, ), StrLang(Box, Box), StrDT(Box, Box), SameTerm(Box, Box), IsIRI(Box), IsBlank(Box), IsLiteral(Box), IsNumeric(Box), Regex( Box, Box, Option>, ), BooleanCast(Box), DoubleCast(Box), FloatCast(Box), DecimalCast(Box), IntegerCast(Box), DateCast(Box), TimeCast(Box), DateTimeCast(Box), StringCast(Box), } impl PlanExpression { fn add_variables(&self, set: &mut BTreeSet) { match self { PlanExpression::Constant(_) | PlanExpression::BNode(None) | PlanExpression::UUID | PlanExpression::StrUUID | PlanExpression::Rand | PlanExpression::Now => (), PlanExpression::Variable(v) | PlanExpression::Bound(v) => { set.insert(*v); } PlanExpression::UnaryPlus(e) | PlanExpression::UnaryMinus(e) | PlanExpression::UnaryNot(e) | PlanExpression::Str(e) | PlanExpression::Lang(e) | PlanExpression::Datatype(e) | PlanExpression::IRI(e) | PlanExpression::BNode(Some(e)) | PlanExpression::Year(e) | PlanExpression::Month(e) | PlanExpression::Day(e) | PlanExpression::Hours(e) | PlanExpression::Minutes(e) | PlanExpression::Seconds(e) | PlanExpression::IsIRI(e) | PlanExpression::IsBlank(e) | PlanExpression::IsLiteral(e) | PlanExpression::IsNumeric(e) | PlanExpression::BooleanCast(e) | PlanExpression::DoubleCast(e) | PlanExpression::FloatCast(e) | PlanExpression::IntegerCast(e) | PlanExpression::DecimalCast(e) | PlanExpression::DateCast(e) | PlanExpression::TimeCast(e) | PlanExpression::DateTimeCast(e) | PlanExpression::StringCast(e) | PlanExpression::Abs(e) | PlanExpression::Ceil(e) | PlanExpression::Floor(e) | PlanExpression::Round(e) | PlanExpression::StrLen(e) | PlanExpression::UCase(e) | PlanExpression::LCase(e) | PlanExpression::EncodeForURI(e) | PlanExpression::Timezone(e) | PlanExpression::Tz(e) | PlanExpression::MD5(e) | PlanExpression::SHA1(e) | PlanExpression::SHA256(e) | PlanExpression::SHA384(e) | PlanExpression::SHA512(e) => { e.add_variables(set); } PlanExpression::Or(a, b) | PlanExpression::And(a, b) | PlanExpression::Equal(a, b) | PlanExpression::NotEqual(a, b) | PlanExpression::Greater(a, b) | PlanExpression::GreaterOrEq(a, b) | PlanExpression::Lower(a, b) | PlanExpression::LowerOrEq(a, b) | PlanExpression::Add(a, b) | PlanExpression::Sub(a, b) | PlanExpression::Mul(a, b) | PlanExpression::Div(a, b) | PlanExpression::SameTerm(a, b) | PlanExpression::LangMatches(a, b) | PlanExpression::StrLang(a, b) | PlanExpression::Contains(a, b) | PlanExpression::StrStarts(a, b) | PlanExpression::StrEnds(a, b) | PlanExpression::StrBefore(a, b) | PlanExpression::StrAfter(a, b) | PlanExpression::StrDT(a, b) | PlanExpression::Regex(a, b, None) | PlanExpression::SubStr(a, b, None) => { a.add_variables(set); b.add_variables(set); } PlanExpression::If(a, b, c) | PlanExpression::SubStr(a, b, Some(c)) | PlanExpression::Replace(a, b, c, None) | PlanExpression::Regex(a, b, Some(c)) => { a.add_variables(set); b.add_variables(set); c.add_variables(set); } PlanExpression::Replace(a, b, c, Some(d)) => { a.add_variables(set); b.add_variables(set); c.add_variables(set); d.add_variables(set); } PlanExpression::Coalesce(l) | PlanExpression::Concat(l) => { for e in l { e.add_variables(set); } } PlanExpression::In(e, l) => { e.add_variables(set); for e in l { e.add_variables(set); } } PlanExpression::Exists(n) => n.add_variables(set), } } } #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PlanAggregation { pub function: PlanAggregationFunction, pub parameter: Option, pub distinct: bool, } #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum PlanAggregationFunction { Count, Sum, Min, Max, Avg, Sample, GroupConcat { separator: String }, } #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum PlanPropertyPath { PredicatePath(EncodedTerm), InversePath(Box), SequencePath(Box, Box), AlternativePath(Box, Box), ZeroOrMorePath(Box), OneOrMorePath(Box), ZeroOrOnePath(Box), NegatedPropertySet(Vec), } #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum Comparator { Asc(PlanExpression), Desc(PlanExpression), } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct TripleTemplate { pub subject: TripleTemplateValue, pub predicate: TripleTemplateValue, pub object: TripleTemplateValue, } #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub enum TripleTemplateValue { Constant(EncodedTerm), BlankNode(usize), Variable(usize), } pub struct DatasetView { store: S, extra: RefCell, default_graph_as_union: bool, } impl DatasetView { pub fn new(store: S, default_graph_as_union: bool) -> Self { Self { store, extra: RefCell::new(MemoryStrStore::default()), default_graph_as_union, } } pub fn quads_for_pattern<'a>( &'a self, subject: Option, predicate: Option, object: Option, graph_name: Option, ) -> Box> + 'a> { if graph_name == None { Box::new( self.store .quads_for_pattern(subject, predicate, object, None) .filter(|quad| match quad { Err(_) => true, Ok(quad) => quad.graph_name != ENCODED_DEFAULT_GRAPH, }), ) } else if graph_name == Some(ENCODED_DEFAULT_GRAPH) && self.default_graph_as_union { Box::new( self.store .quads_for_pattern(subject, predicate, object, None) .map(|quad| { let quad = quad?; Ok(EncodedQuad::new( quad.subject, quad.predicate, quad.object, ENCODED_DEFAULT_GRAPH, )) }), ) } else { self.store .quads_for_pattern(subject, predicate, object, graph_name) } } pub fn encoder<'a>(&'a self) -> impl Encoder + StrContainer + 'a { DatasetViewStrContainer { store: &self.store, extra: self.extra.borrow_mut(), } } } impl StrLookup for DatasetView { type StrType = StringOrStoreString; fn get_str(&self, id: u128) -> Result>> { Ok(if let Some(value) = self.extra.borrow().get_str(id)? { Some(StringOrStoreString::String(value)) } else if let Some(value) = self.store.get_str(id)? { Some(StringOrStoreString::Store(value)) } else { None }) } } struct DatasetViewStrContainer<'a, S: StoreConnection> { store: &'a S, extra: RefMut<'a, MemoryStrStore>, } impl<'a, S: StoreConnection> StrContainer for DatasetViewStrContainer<'a, S> { fn insert_str(&mut self, key: u128, value: &str) -> Result<()> { if self.store.get_str(key)?.is_none() { self.extra.insert_str(key, value) } else { Ok(()) } } }