Makes SPARQL string storage insertion infallible

pull/171/head
Tpt 3 years ago
parent fa7ae0353f
commit f9d9530a1b
  1. 96
      lib/src/sparql/dataset.rs
  2. 190
      lib/src/sparql/eval.rs
  3. 2
      lib/src/sparql/mod.rs
  4. 186
      lib/src/sparql/plan_builder.rs

@ -1,9 +1,9 @@
use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup}; use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::Storage; use crate::storage::Storage;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::iter::empty; use std::iter::empty;
@ -135,6 +135,82 @@ impl DatasetView {
) )
} }
} }
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into();
let encoded = term.into();
self.insert_term_values(term, &encoded);
encoded
}
fn insert_term_values(&self, term: TermRef<'_>, encoded: &EncodedTerm) {
match (term, encoded) {
(TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => {
self.insert_str(iri_id, node.as_str());
}
(TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => {
self.insert_str(id_id, node.as_str());
}
(TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::SmallBigLangStringLiteral { language_id, .. },
) => {
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(
TermRef::Literal(literal),
EncodedTerm::BigSmallLangStringLiteral { value_id, .. },
) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
},
) => {
self.insert_str(value_id, literal.value());
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => {
self.insert_str(datatype_id, literal.datatype().as_str());
}
(
TermRef::Literal(literal),
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
},
) => {
self.insert_str(value_id, literal.value());
self.insert_str(datatype_id, literal.datatype().as_str());
}
(TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => {
self.insert_term_values(triple.subject.as_ref().into(), &encoded.subject);
self.insert_term_values(triple.predicate.as_ref().into(), &encoded.predicate);
self.insert_term_values(triple.object.as_ref(), &encoded.object);
}
_ => (),
}
}
pub fn insert_str(&self, key: &StrHash, value: &str) {
if matches!(self.storage.contains_str(key), Ok(true)) {
return;
}
self.extra
.borrow_mut()
.entry(*key)
.or_insert_with(|| value.to_owned());
}
} }
impl StrLookup for DatasetView { impl StrLookup for DatasetView {
@ -153,22 +229,6 @@ impl StrLookup for DatasetView {
} }
} }
impl StrContainer for DatasetView {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, EvaluationError> {
if self.storage.contains_str(key)? {
Ok(false)
} else {
match self.extra.borrow_mut().entry(*key) {
Entry::Occupied(_) => Ok(false),
Entry::Vacant(entry) => {
entry.insert(value.to_owned());
Ok(true)
}
}
}
}
}
struct EncodedDatasetSpec { struct EncodedDatasetSpec {
default: Option<Vec<EncodedTerm>>, default: Option<Vec<EncodedTerm>>,
named: Option<Vec<EncodedTerm>>, named: Option<Vec<EncodedTerm>>,

@ -998,7 +998,7 @@ impl SimpleEvaluator {
| EncodedTerm::BigBigLangStringLiteral { language_id, .. } => { | EncodedTerm::BigBigLangStringLiteral { language_id, .. } => {
Some(self.build_string_literal_from_id(language_id.into())) Some(self.build_string_literal_from_id(language_id.into()))
} }
e if e.is_literal() => self.build_string_literal(""), e if e.is_literal() => Some(self.build_string_literal("")),
_ => None, _ => None,
}, },
PlanExpression::LangMatches(language_tag, language_range) => { PlanExpression::LangMatches(language_tag, language_range) => {
@ -1033,14 +1033,16 @@ impl SimpleEvaluator {
Some(e) Some(e)
} else { } else {
let iri = self.to_simple_string(&e)?; let iri = self.to_simple_string(&e)?;
self.build_named_node( Some(
&if let Some(base_iri) = &self.base_iri { self.build_named_node(
base_iri.resolve(&iri) &if let Some(base_iri) = &self.base_iri {
} else { base_iri.resolve(&iri)
Iri::parse(iri) } else {
} Iri::parse(iri)
.ok()? }
.into_inner(), .ok()?
.into_inner(),
),
) )
} }
} }
@ -1049,12 +1051,7 @@ impl SimpleEvaluator {
let bnode = let bnode =
BlankNode::new(self.to_simple_string(&self.eval_expression(id, tuple)?)?) BlankNode::new(self.to_simple_string(&self.eval_expression(id, tuple)?)?)
.ok()?; .ok()?;
Some( Some(self.dataset.encode_term(bnode.as_ref()))
self.dataset
.as_ref()
.encode_blank_node(bnode.as_ref())
.ok()?,
)
} }
None => Some(EncodedTerm::NumericalBlankNode { None => Some(EncodedTerm::NumericalBlankNode {
id: random::<u128>(), id: random::<u128>(),
@ -1104,7 +1101,7 @@ impl SimpleEvaluator {
} }
result += &value result += &value
} }
self.build_plain_literal(&result, language.and_then(|v| v)) Some(self.build_plain_literal(&result, language.and_then(|v| v)))
} }
PlanExpression::SubStr(source, starting_loc, length) => { PlanExpression::SubStr(source, starting_loc, length) => {
let (source, language) = let (source, language) =
@ -1146,7 +1143,7 @@ impl SimpleEvaluator {
} else { } else {
"" ""
}; };
self.build_plain_literal(result, language) Some(self.build_plain_literal(result, language))
} }
PlanExpression::StrLen(arg) => Some( PlanExpression::StrLen(arg) => Some(
(self (self
@ -1168,17 +1165,22 @@ impl SimpleEvaluator {
self.to_string_and_language(&self.eval_expression(arg, tuple)?)?; self.to_string_and_language(&self.eval_expression(arg, tuple)?)?;
let replacement = let replacement =
self.to_simple_string(&self.eval_expression(replacement, tuple)?)?; self.to_simple_string(&self.eval_expression(replacement, tuple)?)?;
self.build_plain_literal(&regex.replace_all(&text, replacement.as_str()), language) Some(
self.build_plain_literal(
&regex.replace_all(&text, replacement.as_str()),
language,
),
)
} }
PlanExpression::UCase(e) => { PlanExpression::UCase(e) => {
let (value, language) = let (value, language) =
self.to_string_and_language(&self.eval_expression(e, tuple)?)?; self.to_string_and_language(&self.eval_expression(e, tuple)?)?;
self.build_plain_literal(&value.to_uppercase(), language) Some(self.build_plain_literal(&value.to_uppercase(), language))
} }
PlanExpression::LCase(e) => { PlanExpression::LCase(e) => {
let (value, language) = let (value, language) =
self.to_string_and_language(&self.eval_expression(e, tuple)?)?; self.to_string_and_language(&self.eval_expression(e, tuple)?)?;
self.build_plain_literal(&value.to_lowercase(), language) Some(self.build_plain_literal(&value.to_lowercase(), language))
} }
PlanExpression::StrStarts(arg1, arg2) => { PlanExpression::StrStarts(arg1, arg2) => {
let (arg1, arg2, _) = self.to_argument_compatible_strings( let (arg1, arg2, _) = self.to_argument_compatible_strings(
@ -1212,7 +1214,7 @@ impl SimpleEvaluator {
} }
} }
} }
self.build_string_literal(str::from_utf8(&result).ok()?) Some(self.build_string_literal(str::from_utf8(&result).ok()?))
} }
PlanExpression::StrEnds(arg1, arg2) => { PlanExpression::StrEnds(arg1, arg2) => {
let (arg1, arg2, _) = self.to_argument_compatible_strings( let (arg1, arg2, _) = self.to_argument_compatible_strings(
@ -1233,22 +1235,22 @@ impl SimpleEvaluator {
&self.eval_expression(arg1, tuple)?, &self.eval_expression(arg1, tuple)?,
&self.eval_expression(arg2, tuple)?, &self.eval_expression(arg2, tuple)?,
)?; )?;
if let Some(position) = (&arg1).find(arg2.as_str()) { Some(if let Some(position) = (&arg1).find(arg2.as_str()) {
self.build_plain_literal(&arg1[..position], language) self.build_plain_literal(&arg1[..position], language)
} else { } else {
self.build_string_literal("") self.build_string_literal("")
} })
} }
PlanExpression::StrAfter(arg1, arg2) => { PlanExpression::StrAfter(arg1, arg2) => {
let (arg1, arg2, language) = self.to_argument_compatible_strings( let (arg1, arg2, language) = self.to_argument_compatible_strings(
&self.eval_expression(arg1, tuple)?, &self.eval_expression(arg1, tuple)?,
&self.eval_expression(arg2, tuple)?, &self.eval_expression(arg2, tuple)?,
)?; )?;
if let Some(position) = (&arg1).find(arg2.as_str()) { Some(if let Some(position) = (&arg1).find(arg2.as_str()) {
self.build_plain_literal(&arg1[position + arg2.len()..], language) self.build_plain_literal(&arg1[position + arg2.len()..], language)
} else { } else {
self.build_string_literal("") self.build_string_literal("")
} })
} }
PlanExpression::Year(e) => match self.eval_expression(e, tuple)? { PlanExpression::Year(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.year().into()), EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.year().into()),
@ -1313,24 +1315,24 @@ impl SimpleEvaluator {
EncodedTerm::GMonthLiteral(month) => month.timezone_offset(), EncodedTerm::GMonthLiteral(month) => month.timezone_offset(),
_ => return None, _ => return None,
}; };
match timezone_offset { Some(match timezone_offset {
Some(timezone_offset) => { Some(timezone_offset) => {
self.build_string_literal(&timezone_offset.to_string()) self.build_string_literal(&timezone_offset.to_string())
} }
None => self.build_string_literal(""), None => self.build_string_literal(""),
} })
} }
PlanExpression::Now => Some(self.now.into()), PlanExpression::Now => Some(self.now.into()),
PlanExpression::Uuid => { PlanExpression::Uuid => {
let mut buffer = String::with_capacity(44); let mut buffer = String::with_capacity(44);
buffer.push_str("urn:uuid:"); buffer.push_str("urn:uuid:");
generate_uuid(&mut buffer); generate_uuid(&mut buffer);
self.build_named_node(&buffer) Some(self.build_named_node(&buffer))
} }
PlanExpression::StrUuid => { PlanExpression::StrUuid => {
let mut buffer = String::with_capacity(36); let mut buffer = String::with_capacity(36);
generate_uuid(&mut buffer); generate_uuid(&mut buffer);
self.build_string_literal(&buffer) Some(self.build_string_literal(&buffer))
} }
PlanExpression::Md5(arg) => self.hash::<Md5>(arg, tuple), PlanExpression::Md5(arg) => self.hash::<Md5>(arg, tuple),
PlanExpression::Sha1(arg) => self.hash::<Sha1>(arg, tuple), PlanExpression::Sha1(arg) => self.hash::<Sha1>(arg, tuple),
@ -1367,13 +1369,10 @@ impl SimpleEvaluator {
} else { } else {
None None
}?; }?;
let encoder = self.dataset.as_ref(); Some(self.dataset.encode_term(LiteralRef::new_typed_literal(
encoder &value,
.encode_literal(LiteralRef::new_typed_literal( NamedNodeRef::new_unchecked(&datatype),
&value, )))
NamedNodeRef::new_unchecked(&datatype),
))
.ok()
} }
PlanExpression::SameTerm(a, b) => { PlanExpression::SameTerm(a, b) => {
Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into()) Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into())
@ -1608,25 +1607,27 @@ impl SimpleEvaluator {
| EncodedTerm::BigBigLangStringLiteral { value_id, .. } | EncodedTerm::BigBigLangStringLiteral { value_id, .. }
| EncodedTerm::BigTypedLiteral { value_id, .. } => Some((*value_id).into()), | EncodedTerm::BigTypedLiteral { value_id, .. } => Some((*value_id).into()),
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
self.build_string_id(if *value { "true" } else { "false" }) Some(self.build_string_id(if *value { "true" } else { "false" }))
} }
EncodedTerm::FloatLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::FloatLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::DoubleLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::DoubleLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::IntegerLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::IntegerLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::DecimalLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::DecimalLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::DateTimeLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::DateTimeLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::TimeLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::TimeLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::DateLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::DateLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::GYearMonthLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::GYearMonthLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::GYearLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::GYearLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::GMonthDayLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::GMonthDayLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::GDayLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::GDayLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::GMonthLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::GMonthLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::DurationLiteral(value) => self.build_string_id(&value.to_string()), EncodedTerm::DurationLiteral(value) => Some(self.build_string_id(&value.to_string())),
EncodedTerm::YearMonthDurationLiteral(value) => { EncodedTerm::YearMonthDurationLiteral(value) => {
self.build_string_id(&value.to_string()) Some(self.build_string_id(&value.to_string()))
}
EncodedTerm::DayTimeDurationLiteral(value) => {
Some(self.build_string_id(&value.to_string()))
} }
EncodedTerm::DayTimeDurationLiteral(value) => self.build_string_id(&value.to_string()),
} }
} }
@ -1690,14 +1691,16 @@ impl SimpleEvaluator {
} }
} }
fn build_named_node(&self, iri: &str) -> Option<EncodedTerm> { fn build_named_node(&self, iri: &str) -> EncodedTerm {
Some(EncodedTerm::NamedNode { self.dataset.encode_term(NamedNodeRef::new_unchecked(iri))
iri_id: self.dataset.as_ref().encode_str(iri).ok()?,
})
} }
fn build_string_literal(&self, value: &str) -> Option<EncodedTerm> { fn encode_named_node(&self, node: NamedNodeRef<'_>) -> EncodedTerm {
Some(self.build_string_literal_from_id(self.build_string_id(value)?)) self.dataset.encode_term(node)
}
fn build_string_literal(&self, value: &str) -> EncodedTerm {
self.build_string_literal_from_id(self.build_string_id(value))
} }
fn build_string_literal_from_id(&self, id: SmallStringOrId) -> EncodedTerm { fn build_string_literal_from_id(&self, id: SmallStringOrId) -> EncodedTerm {
@ -1707,12 +1710,8 @@ impl SimpleEvaluator {
} }
} }
fn build_lang_string_literal( fn build_lang_string_literal(&self, value: &str, language_id: SmallStringOrId) -> EncodedTerm {
&self, self.build_lang_string_literal_from_id(self.build_string_id(value), language_id)
value: &str,
language_id: SmallStringOrId,
) -> Option<EncodedTerm> {
Some(self.build_lang_string_literal_from_id(self.build_string_id(value)?, language_id))
} }
fn build_lang_string_literal_from_id( fn build_lang_string_literal_from_id(
@ -1739,11 +1738,7 @@ impl SimpleEvaluator {
} }
} }
fn build_plain_literal( fn build_plain_literal(&self, value: &str, language: Option<SmallStringOrId>) -> EncodedTerm {
&self,
value: &str,
language: Option<SmallStringOrId>,
) -> Option<EncodedTerm> {
if let Some(language_id) = language { if let Some(language_id) = language {
self.build_lang_string_literal(value, language_id) self.build_lang_string_literal(value, language_id)
} else { } else {
@ -1751,18 +1746,20 @@ impl SimpleEvaluator {
} }
} }
fn build_string_id(&self, value: &str) -> Option<SmallStringOrId> { fn build_string_id(&self, value: &str) -> SmallStringOrId {
Some(if let Ok(value) = SmallString::try_from(value) { if let Ok(value) = SmallString::try_from(value) {
value.into() value.into()
} else { } else {
self.dataset.as_ref().encode_str(value).ok()?.into() let id = StrHash::new(value);
}) self.dataset.insert_str(&id, value);
SmallStringOrId::Big(id)
}
} }
fn build_language_id(&self, value: &EncodedTerm) -> Option<SmallStringOrId> { fn build_language_id(&self, value: &EncodedTerm) -> Option<SmallStringOrId> {
let mut language = self.to_simple_string(value)?; let mut language = self.to_simple_string(value)?;
language.make_ascii_lowercase(); language.make_ascii_lowercase();
self.build_string_id(LanguageTag::parse(language).ok()?.as_str()) Some(self.build_string_id(LanguageTag::parse(language).ok()?.as_str()))
} }
fn to_argument_compatible_strings( fn to_argument_compatible_strings(
@ -1849,13 +1846,12 @@ impl SimpleEvaluator {
) -> EncodedTuplesIterator { ) -> EncodedTuplesIterator {
let eval = self.clone(); let eval = self.clone();
Box::new(iter.map(move |solution| { Box::new(iter.map(move |solution| {
let encoder = eval.dataset.as_ref();
let mut encoded_terms = EncodedTuple::with_capacity(variables.len()); let mut encoded_terms = EncodedTuple::with_capacity(variables.len());
for (variable, term) in solution?.iter() { for (variable, term) in solution?.iter() {
put_variable_value( put_variable_value(
variable, variable,
&variables, &variables,
encoder.encode_term(term.as_ref())?, eval.dataset.encode_term(term.as_ref()),
&mut encoded_terms, &mut encoded_terms,
) )
} }
@ -2203,7 +2199,7 @@ impl SimpleEvaluator {
fn hash<H: Digest>(&self, arg: &PlanExpression, tuple: &EncodedTuple) -> Option<EncodedTerm> { fn hash<H: Digest>(&self, arg: &PlanExpression, tuple: &EncodedTuple) -> Option<EncodedTerm> {
let input = self.to_simple_string(&self.eval_expression(arg, tuple)?)?; let input = self.to_simple_string(&self.eval_expression(arg, tuple)?)?;
let hash = hex::encode(H::new().chain(input.as_str()).finalize()); let hash = hex::encode(H::new().chain(input.as_str()).finalize());
self.build_string_literal(&hash) Some(self.build_string_literal(&hash))
} }
fn datatype(&self, value: &EncodedTerm) -> Option<EncodedTerm> { fn datatype(&self, value: &EncodedTerm) -> Option<EncodedTerm> {
@ -2216,37 +2212,37 @@ impl SimpleEvaluator {
| EncodedTerm::DefaultGraph | EncodedTerm::DefaultGraph
| EncodedTerm::Triple(_) => None, | EncodedTerm::Triple(_) => None,
EncodedTerm::SmallStringLiteral(_) | EncodedTerm::BigStringLiteral { .. } => { EncodedTerm::SmallStringLiteral(_) | EncodedTerm::BigStringLiteral { .. } => {
self.build_named_node(xsd::STRING.as_str()) Some(self.encode_named_node(xsd::STRING))
} }
EncodedTerm::SmallSmallLangStringLiteral { .. } EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. } | EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. } | EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. } => { | EncodedTerm::BigBigLangStringLiteral { .. } => {
self.build_named_node(rdf::LANG_STRING.as_str()) Some(self.encode_named_node(rdf::LANG_STRING))
} }
EncodedTerm::SmallTypedLiteral { datatype_id, .. } EncodedTerm::SmallTypedLiteral { datatype_id, .. }
| EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { | EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: *datatype_id, iri_id: *datatype_id,
}), }),
EncodedTerm::BooleanLiteral(..) => self.build_named_node(xsd::BOOLEAN.as_str()), EncodedTerm::BooleanLiteral(..) => Some(self.encode_named_node(xsd::BOOLEAN)),
EncodedTerm::FloatLiteral(..) => self.build_named_node(xsd::FLOAT.as_str()), EncodedTerm::FloatLiteral(..) => Some(self.encode_named_node(xsd::FLOAT)),
EncodedTerm::DoubleLiteral(..) => self.build_named_node(xsd::DOUBLE.as_str()), EncodedTerm::DoubleLiteral(..) => Some(self.encode_named_node(xsd::DOUBLE)),
EncodedTerm::IntegerLiteral(..) => self.build_named_node(xsd::INTEGER.as_str()), EncodedTerm::IntegerLiteral(..) => Some(self.encode_named_node(xsd::INTEGER)),
EncodedTerm::DecimalLiteral(..) => self.build_named_node(xsd::DECIMAL.as_str()), EncodedTerm::DecimalLiteral(..) => Some(self.encode_named_node(xsd::DECIMAL)),
EncodedTerm::DateTimeLiteral(..) => self.build_named_node(xsd::DATE_TIME.as_str()), EncodedTerm::DateTimeLiteral(..) => Some(self.encode_named_node(xsd::DATE_TIME)),
EncodedTerm::TimeLiteral(..) => self.build_named_node(xsd::TIME.as_str()), EncodedTerm::TimeLiteral(..) => Some(self.encode_named_node(xsd::TIME)),
EncodedTerm::DateLiteral(..) => self.build_named_node(xsd::DATE.as_str()), EncodedTerm::DateLiteral(..) => Some(self.encode_named_node(xsd::DATE)),
EncodedTerm::GYearMonthLiteral(..) => self.build_named_node(xsd::G_YEAR_MONTH.as_str()), EncodedTerm::GYearMonthLiteral(..) => Some(self.encode_named_node(xsd::G_YEAR_MONTH)),
EncodedTerm::GYearLiteral(..) => self.build_named_node(xsd::G_YEAR.as_str()), EncodedTerm::GYearLiteral(..) => Some(self.encode_named_node(xsd::G_YEAR)),
EncodedTerm::GMonthDayLiteral(..) => self.build_named_node(xsd::G_MONTH_DAY.as_str()), EncodedTerm::GMonthDayLiteral(..) => Some(self.encode_named_node(xsd::G_MONTH_DAY)),
EncodedTerm::GDayLiteral(..) => self.build_named_node(xsd::G_DAY.as_str()), EncodedTerm::GDayLiteral(..) => Some(self.encode_named_node(xsd::G_DAY)),
EncodedTerm::GMonthLiteral(..) => self.build_named_node(xsd::G_MONTH.as_str()), EncodedTerm::GMonthLiteral(..) => Some(self.encode_named_node(xsd::G_MONTH)),
EncodedTerm::DurationLiteral(..) => self.build_named_node(xsd::DURATION.as_str()), EncodedTerm::DurationLiteral(..) => Some(self.encode_named_node(xsd::DURATION)),
EncodedTerm::YearMonthDurationLiteral(..) => { EncodedTerm::YearMonthDurationLiteral(..) => {
self.build_named_node(xsd::YEAR_MONTH_DURATION.as_str()) Some(self.encode_named_node(xsd::YEAR_MONTH_DURATION))
} }
EncodedTerm::DayTimeDurationLiteral(..) => { EncodedTerm::DayTimeDurationLiteral(..) => {
self.build_named_node(xsd::DAY_TIME_DURATION.as_str()) Some(self.encode_named_node(xsd::DAY_TIME_DURATION))
} }
} }
} }
@ -3165,7 +3161,7 @@ impl Accumulator for GroupConcatAccumulator {
} }
fn state(&self) -> Option<EncodedTerm> { fn state(&self) -> Option<EncodedTerm> {
self.concat.as_ref().and_then(|result| { self.concat.as_ref().map(|result| {
self.eval self.eval
.build_plain_literal(result, self.language.and_then(|v| v)) .build_plain_literal(result, self.language.and_then(|v| v))
}) })

@ -80,7 +80,7 @@ pub(crate) fn evaluate_query(
.. ..
} => { } => {
let (plan, variables) = PlanBuilder::build(&dataset, &pattern)?; let (plan, variables) = PlanBuilder::build(&dataset, &pattern)?;
let construct = PlanBuilder::build_graph_template(&dataset, &template, variables)?; let construct = PlanBuilder::build_graph_template(&dataset, &template, variables);
SimpleEvaluator::new( SimpleEvaluator::new(
Rc::new(dataset), Rc::new(dataset),
base_iri.map(Rc::new), base_iri.map(Rc::new),

@ -3,7 +3,7 @@ use crate::sparql::dataset::DatasetView;
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::model::Variable as OxVariable; use crate::sparql::model::Variable as OxVariable;
use crate::sparql::plan::*; use crate::sparql::plan::*;
use crate::storage::numeric_encoder::{EncodedTerm, EncodedTriple, WriteEncoder}; use crate::storage::numeric_encoder::{EncodedTerm, EncodedTriple};
use rand::random; use rand::random;
use spargebra::algebra::*; use spargebra::algebra::*;
use spargebra::term::*; use spargebra::term::*;
@ -32,7 +32,7 @@ impl<'a> PlanBuilder<'a> {
dataset: &'a DatasetView, dataset: &'a DatasetView,
template: &[TriplePattern], template: &[TriplePattern],
mut variables: Vec<Variable>, mut variables: Vec<Variable>,
) -> Result<Vec<TripleTemplate>, EvaluationError> { ) -> Vec<TripleTemplate> {
PlanBuilder { dataset }.build_for_graph_template(template, &mut variables) PlanBuilder { dataset }.build_for_graph_template(template, &mut variables)
} }
@ -43,16 +43,16 @@ impl<'a> PlanBuilder<'a> {
graph_name: &PatternValue, graph_name: &PatternValue,
) -> Result<PlanNode, EvaluationError> { ) -> Result<PlanNode, EvaluationError> {
Ok(match pattern { Ok(match pattern {
GraphPattern::Bgp(p) => self.build_for_bgp(p, variables, graph_name)?, GraphPattern::Bgp(p) => self.build_for_bgp(p, variables, graph_name),
GraphPattern::Path { GraphPattern::Path {
subject, subject,
path, path,
object, object,
} => PlanNode::PathPatternJoin { } => PlanNode::PathPatternJoin {
child: Rc::new(PlanNode::Init), child: Rc::new(PlanNode::Init),
subject: self.pattern_value_from_term_or_variable(subject, variables)?, subject: self.pattern_value_from_term_or_variable(subject, variables),
path: Rc::new(self.build_for_path(path)?), path: Rc::new(self.build_for_path(path)),
object: self.pattern_value_from_term_or_variable(object, variables)?, object: self.pattern_value_from_term_or_variable(object, variables),
graph_name: graph_name.clone(), graph_name: graph_name.clone(),
}, },
GraphPattern::Join { left, right } => { GraphPattern::Join { left, right } => {
@ -66,9 +66,9 @@ impl<'a> PlanBuilder<'a> {
let left = self.build_for_graph_pattern(left, variables, graph_name)?; let left = self.build_for_graph_pattern(left, variables, graph_name)?;
PlanNode::PathPatternJoin { PlanNode::PathPatternJoin {
child: Rc::new(left), child: Rc::new(left),
subject: self.pattern_value_from_term_or_variable(subject, variables)?, subject: self.pattern_value_from_term_or_variable(subject, variables),
path: Rc::new(self.build_for_path(path)?), path: Rc::new(self.build_for_path(path)),
object: self.pattern_value_from_term_or_variable(object, variables)?, object: self.pattern_value_from_term_or_variable(object, variables),
graph_name: graph_name.clone(), graph_name: graph_name.clone(),
} }
} else { } else {
@ -127,7 +127,7 @@ impl<'a> PlanBuilder<'a> {
} }
GraphPattern::Graph { graph_name, inner } => { GraphPattern::Graph { graph_name, inner } => {
let graph_name = let graph_name =
self.pattern_value_from_named_node_or_variable(graph_name, variables)?; self.pattern_value_from_named_node_or_variable(graph_name, variables);
self.build_for_graph_pattern(inner, variables, &graph_name)? self.build_for_graph_pattern(inner, variables, &graph_name)?
} }
GraphPattern::Extend { inner, var, expr } => PlanNode::Extend { GraphPattern::Extend { inner, var, expr } => PlanNode::Extend {
@ -146,8 +146,7 @@ impl<'a> PlanBuilder<'a> {
} => { } => {
// Child building should be at the begging in order for `variables` to be filled // Child building should be at the begging in order for `variables` to be filled
let child = self.build_for_graph_pattern(pattern, variables, graph_name)?; let child = self.build_for_graph_pattern(pattern, variables, graph_name)?;
let service_name = let service_name = self.pattern_value_from_named_node_or_variable(name, variables);
self.pattern_value_from_named_node_or_variable(name, variables)?;
PlanNode::Service { PlanNode::Service {
service_name, service_name,
variables: Rc::new( variables: Rc::new(
@ -203,7 +202,7 @@ impl<'a> PlanBuilder<'a> {
variables: table_variables, variables: table_variables,
rows, rows,
} => PlanNode::StaticBindings { } => PlanNode::StaticBindings {
tuples: self.encode_bindings(table_variables, rows, variables)?, tuples: self.encode_bindings(table_variables, rows, variables),
}, },
GraphPattern::OrderBy { inner, condition } => { GraphPattern::OrderBy { inner, condition } => {
let condition: Result<Vec<_>, EvaluationError> = condition let condition: Result<Vec<_>, EvaluationError> = condition
@ -277,57 +276,50 @@ impl<'a> PlanBuilder<'a> {
p: &[TriplePattern], p: &[TriplePattern],
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
graph_name: &PatternValue, graph_name: &PatternValue,
) -> Result<PlanNode, EvaluationError> { ) -> PlanNode {
let mut plan = PlanNode::Init; let mut plan = PlanNode::Init;
for pattern in sort_bgp(p) { for pattern in sort_bgp(p) {
plan = PlanNode::QuadPatternJoin { plan = PlanNode::QuadPatternJoin {
child: Rc::new(plan), child: Rc::new(plan),
subject: self.pattern_value_from_term_or_variable(&pattern.subject, variables)?, subject: self.pattern_value_from_term_or_variable(&pattern.subject, variables),
predicate: self predicate: self
.pattern_value_from_named_node_or_variable(&pattern.predicate, variables)?, .pattern_value_from_named_node_or_variable(&pattern.predicate, variables),
object: self.pattern_value_from_term_or_variable(&pattern.object, variables)?, object: self.pattern_value_from_term_or_variable(&pattern.object, variables),
graph_name: graph_name.clone(), graph_name: graph_name.clone(),
} }
} }
Ok(plan) plan
} }
fn build_for_path( fn build_for_path(&mut self, path: &PropertyPathExpression) -> PlanPropertyPath {
&mut self, match path {
path: &PropertyPathExpression,
) -> Result<PlanPropertyPath, EvaluationError> {
Ok(match path {
PropertyPathExpression::NamedNode(p) => { PropertyPathExpression::NamedNode(p) => {
PlanPropertyPath::Path(self.build_named_node(p)?) PlanPropertyPath::Path(self.build_named_node(p))
} }
PropertyPathExpression::Reverse(p) => { PropertyPathExpression::Reverse(p) => {
PlanPropertyPath::Reverse(Rc::new(self.build_for_path(p)?)) PlanPropertyPath::Reverse(Rc::new(self.build_for_path(p)))
} }
PropertyPathExpression::Alternative(a, b) => PlanPropertyPath::Alternative( PropertyPathExpression::Alternative(a, b) => PlanPropertyPath::Alternative(
Rc::new(self.build_for_path(a)?), Rc::new(self.build_for_path(a)),
Rc::new(self.build_for_path(b)?), Rc::new(self.build_for_path(b)),
), ),
PropertyPathExpression::Sequence(a, b) => PlanPropertyPath::Sequence( PropertyPathExpression::Sequence(a, b) => PlanPropertyPath::Sequence(
Rc::new(self.build_for_path(a)?), Rc::new(self.build_for_path(a)),
Rc::new(self.build_for_path(b)?), Rc::new(self.build_for_path(b)),
), ),
PropertyPathExpression::ZeroOrMore(p) => { PropertyPathExpression::ZeroOrMore(p) => {
PlanPropertyPath::ZeroOrMore(Rc::new(self.build_for_path(p)?)) PlanPropertyPath::ZeroOrMore(Rc::new(self.build_for_path(p)))
} }
PropertyPathExpression::OneOrMore(p) => { PropertyPathExpression::OneOrMore(p) => {
PlanPropertyPath::OneOrMore(Rc::new(self.build_for_path(p)?)) PlanPropertyPath::OneOrMore(Rc::new(self.build_for_path(p)))
} }
PropertyPathExpression::ZeroOrOne(p) => { PropertyPathExpression::ZeroOrOne(p) => {
PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p)?)) PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p)))
}
PropertyPathExpression::NegatedPropertySet(p) => {
PlanPropertyPath::NegatedPropertySet(Rc::new(
p.iter()
.map(|p| self.build_named_node(p))
.collect::<Result<Vec<_>, _>>()?,
))
} }
}) PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet(
Rc::new(p.iter().map(|p| self.build_named_node(p)).collect()),
),
}
} }
fn build_for_expression( fn build_for_expression(
@ -337,8 +329,8 @@ impl<'a> PlanBuilder<'a> {
graph_name: &PatternValue, graph_name: &PatternValue,
) -> Result<PlanExpression, EvaluationError> { ) -> Result<PlanExpression, EvaluationError> {
Ok(match expression { Ok(match expression {
Expression::NamedNode(node) => PlanExpression::Constant(self.build_named_node(node)?), Expression::NamedNode(node) => PlanExpression::Constant(self.build_named_node(node)),
Expression::Literal(l) => PlanExpression::Constant(self.build_literal(l)?), Expression::Literal(l) => PlanExpression::Constant(self.build_literal(l)),
Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)), Expression::Variable(v) => PlanExpression::Variable(variable_key(variables, v)),
Expression::Or(a, b) => PlanExpression::Or( Expression::Or(a, b) => PlanExpression::Or(
Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(a, variables, graph_name)?),
@ -791,12 +783,12 @@ impl<'a> PlanBuilder<'a> {
&mut self, &mut self,
term_or_variable: &TermPattern, term_or_variable: &TermPattern,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<PatternValue, EvaluationError> { ) -> PatternValue {
Ok(match term_or_variable { match term_or_variable {
TermPattern::Variable(variable) => { TermPattern::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable)) PatternValue::Variable(variable_key(variables, variable))
} }
TermPattern::NamedNode(node) => PatternValue::Constant(self.build_named_node(node)?), TermPattern::NamedNode(node) => PatternValue::Constant(self.build_named_node(node)),
TermPattern::BlankNode(bnode) => { TermPattern::BlankNode(bnode) => {
PatternValue::Variable(variable_key( PatternValue::Variable(variable_key(
variables, variables,
@ -806,12 +798,12 @@ impl<'a> PlanBuilder<'a> {
)) ))
//TODO: very bad hack to convert bnode to variable //TODO: very bad hack to convert bnode to variable
} }
TermPattern::Literal(literal) => PatternValue::Constant(self.build_literal(literal)?), TermPattern::Literal(literal) => PatternValue::Constant(self.build_literal(literal)),
TermPattern::Triple(triple) => { TermPattern::Triple(triple) => {
match ( match (
self.pattern_value_from_term_or_variable(&triple.subject, variables)?, self.pattern_value_from_term_or_variable(&triple.subject, variables),
self.pattern_value_from_named_node_or_variable(&triple.predicate, variables)?, self.pattern_value_from_named_node_or_variable(&triple.predicate, variables),
self.pattern_value_from_term_or_variable(&triple.object, variables)?, self.pattern_value_from_term_or_variable(&triple.object, variables),
) { ) {
( (
PatternValue::Constant(subject), PatternValue::Constant(subject),
@ -834,22 +826,22 @@ impl<'a> PlanBuilder<'a> {
} }
} }
} }
}) }
} }
fn pattern_value_from_named_node_or_variable( fn pattern_value_from_named_node_or_variable(
&mut self, &mut self,
named_node_or_variable: &NamedNodePattern, named_node_or_variable: &NamedNodePattern,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<PatternValue, EvaluationError> { ) -> PatternValue {
Ok(match named_node_or_variable { match named_node_or_variable {
NamedNodePattern::NamedNode(named_node) => { NamedNodePattern::NamedNode(named_node) => {
PatternValue::Constant(self.build_named_node(named_node)?) PatternValue::Constant(self.build_named_node(named_node))
} }
NamedNodePattern::Variable(variable) => { NamedNodePattern::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable)) PatternValue::Variable(variable_key(variables, variable))
} }
}) }
} }
fn encode_bindings( fn encode_bindings(
@ -857,7 +849,7 @@ impl<'a> PlanBuilder<'a> {
table_variables: &[Variable], table_variables: &[Variable],
rows: &[Vec<Option<GroundTerm>>], rows: &[Vec<Option<GroundTerm>>],
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<Vec<EncodedTuple>, EvaluationError> { ) -> Vec<EncodedTuple> {
let bindings_variables_keys = table_variables let bindings_variables_keys = table_variables
.iter() .iter()
.map(|v| variable_key(variables, v)) .map(|v| variable_key(variables, v))
@ -873,11 +865,11 @@ impl<'a> PlanBuilder<'a> {
GroundTerm::NamedNode(node) => self.build_named_node(node), GroundTerm::NamedNode(node) => self.build_named_node(node),
GroundTerm::Literal(literal) => self.build_literal(literal), GroundTerm::Literal(literal) => self.build_literal(literal),
GroundTerm::Triple(triple) => self.build_triple(triple), GroundTerm::Triple(triple) => self.build_triple(triple),
}?, },
); );
} }
} }
Ok(result) result
}) })
.collect() .collect()
} }
@ -943,25 +935,23 @@ impl<'a> PlanBuilder<'a> {
&mut self, &mut self,
template: &[TriplePattern], template: &[TriplePattern],
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<Vec<TripleTemplate>, EvaluationError> { ) -> Vec<TripleTemplate> {
let mut bnodes = Vec::default(); let mut bnodes = Vec::default();
template template
.iter() .iter()
.map(|triple| { .map(|triple| TripleTemplate {
Ok(TripleTemplate { subject: self.template_value_from_term_or_variable(
subject: self.template_value_from_term_or_variable( &triple.subject,
&triple.subject, variables,
variables, &mut bnodes,
&mut bnodes, ),
)?, predicate: self
predicate: self .template_value_from_named_node_or_variable(&triple.predicate, variables),
.template_value_from_named_node_or_variable(&triple.predicate, variables)?, object: self.template_value_from_term_or_variable(
object: self.template_value_from_term_or_variable( &triple.object,
&triple.object, variables,
variables, &mut bnodes,
&mut bnodes, ),
)?,
})
}) })
.collect() .collect()
} }
@ -971,24 +961,24 @@ impl<'a> PlanBuilder<'a> {
term_or_variable: &TermPattern, term_or_variable: &TermPattern,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
bnodes: &mut Vec<BlankNode>, bnodes: &mut Vec<BlankNode>,
) -> Result<TripleTemplateValue, EvaluationError> { ) -> TripleTemplateValue {
Ok(match term_or_variable { match term_or_variable {
TermPattern::Variable(variable) => { TermPattern::Variable(variable) => {
TripleTemplateValue::Variable(variable_key(variables, variable)) TripleTemplateValue::Variable(variable_key(variables, variable))
} }
TermPattern::NamedNode(node) => { TermPattern::NamedNode(node) => {
TripleTemplateValue::Constant(self.build_named_node(node)?) TripleTemplateValue::Constant(self.build_named_node(node))
} }
TermPattern::BlankNode(bnode) => { TermPattern::BlankNode(bnode) => {
TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode)) TripleTemplateValue::BlankNode(bnode_key(bnodes, bnode))
} }
TermPattern::Literal(literal) => { TermPattern::Literal(literal) => {
TripleTemplateValue::Constant(self.build_literal(literal)?) TripleTemplateValue::Constant(self.build_literal(literal))
} }
TermPattern::Triple(triple) => match ( TermPattern::Triple(triple) => match (
self.template_value_from_term_or_variable(&triple.subject, variables, bnodes)?, self.template_value_from_term_or_variable(&triple.subject, variables, bnodes),
self.template_value_from_named_node_or_variable(&triple.predicate, variables)?, self.template_value_from_named_node_or_variable(&triple.predicate, variables),
self.template_value_from_term_or_variable(&triple.object, variables, bnodes)?, self.template_value_from_term_or_variable(&triple.object, variables, bnodes),
) { ) {
( (
TripleTemplateValue::Constant(subject), TripleTemplateValue::Constant(subject),
@ -1010,22 +1000,22 @@ impl<'a> PlanBuilder<'a> {
})) }))
} }
}, },
}) }
} }
fn template_value_from_named_node_or_variable( fn template_value_from_named_node_or_variable(
&mut self, &mut self,
named_node_or_variable: &NamedNodePattern, named_node_or_variable: &NamedNodePattern,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<TripleTemplateValue, EvaluationError> { ) -> TripleTemplateValue {
Ok(match named_node_or_variable { match named_node_or_variable {
NamedNodePattern::Variable(variable) => { NamedNodePattern::Variable(variable) => {
TripleTemplateValue::Variable(variable_key(variables, variable)) TripleTemplateValue::Variable(variable_key(variables, variable))
} }
NamedNodePattern::NamedNode(term) => { NamedNodePattern::NamedNode(term) => {
TripleTemplateValue::Constant(self.build_named_node(term)?) TripleTemplateValue::Constant(self.build_named_node(term))
} }
}) }
} }
fn convert_pattern_value_id( fn convert_pattern_value_id(
@ -1132,13 +1122,13 @@ impl<'a> PlanBuilder<'a> {
} }
} }
fn build_named_node(&mut self, node: &NamedNode) -> Result<EncodedTerm, EvaluationError> { fn build_named_node(&mut self, node: &NamedNode) -> EncodedTerm {
self.dataset self.dataset
.encode_named_node(NamedNodeRef::new_unchecked(node.iri.as_str())) .encode_term(NamedNodeRef::new_unchecked(node.iri.as_str()))
} }
fn build_literal(&mut self, literal: &Literal) -> Result<EncodedTerm, EvaluationError> { fn build_literal(&mut self, literal: &Literal) -> EncodedTerm {
self.dataset.encode_literal(match literal { self.dataset.encode_term(match literal {
Literal::Simple { value } => LiteralRef::new_simple_literal(value), Literal::Simple { value } => LiteralRef::new_simple_literal(value),
Literal::LanguageTaggedString { value, language } => { Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language.as_str()) LiteralRef::new_language_tagged_literal_unchecked(value, language.as_str())
@ -1150,20 +1140,20 @@ impl<'a> PlanBuilder<'a> {
}) })
} }
fn build_triple(&mut self, triple: &GroundTriple) -> Result<EncodedTerm, EvaluationError> { fn build_triple(&mut self, triple: &GroundTriple) -> EncodedTerm {
Ok(EncodedTriple::new( EncodedTriple::new(
match &triple.subject { match &triple.subject {
GroundSubject::NamedNode(node) => self.build_named_node(node)?, GroundSubject::NamedNode(node) => self.build_named_node(node),
GroundSubject::Triple(triple) => self.build_triple(triple)?, GroundSubject::Triple(triple) => self.build_triple(triple),
}, },
self.build_named_node(&triple.predicate)?, self.build_named_node(&triple.predicate),
match &triple.object { match &triple.object {
GroundTerm::NamedNode(node) => self.build_named_node(node)?, GroundTerm::NamedNode(node) => self.build_named_node(node),
GroundTerm::Literal(literal) => self.build_literal(literal)?, GroundTerm::Literal(literal) => self.build_literal(literal),
GroundTerm::Triple(triple) => self.build_triple(triple)?, GroundTerm::Triple(triple) => self.build_triple(triple),
}, },
) )
.into()) .into()
} }
} }

Loading…
Cancel
Save