From 3beecdff76bb67755df06fc51e13c493f7243d84 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 27 Aug 2019 15:28:53 +0200 Subject: [PATCH] Implements SPARQL 1.1 functions --- lib/Cargo.toml | 6 + lib/src/model/blank_node.rs | 12 +- lib/src/sparql/algebra.rs | 108 +++-- lib/src/sparql/eval.rs | 559 ++++++++++++++++++++++---- lib/src/sparql/mod.rs | 14 +- lib/src/sparql/plan.rs | 108 +++-- lib/src/sparql/plan_builder.rs | 128 +++++- lib/src/sparql/sparql_grammar.rustpeg | 18 +- lib/src/sparql/xml_results.rs | 40 +- lib/tests/sparql_test_cases.rs | 9 +- 10 files changed, 818 insertions(+), 184 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index b6e55e86..cb10037e 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -25,11 +25,17 @@ ordered-float = "1" num-traits = "0.2" rust_decimal = "1" chrono = "0.4" +rand = "0.7" +md-5 = "0.8" +sha-1 = "0.8" +sha2 = "0.8" +digest = "0.8" failure = "0.1" regex = "1" rio_api = "0.3" rio_turtle = "0.3" rio_xml = "0.3" +hex = "0.3" [build-dependencies] peg = "0.5" diff --git a/lib/src/model/blank_node.rs b/lib/src/model/blank_node.rs index be37a4c4..4126d5ac 100644 --- a/lib/src/model/blank_node.rs +++ b/lib/src/model/blank_node.rs @@ -1,5 +1,6 @@ use rio_api::model as rio; use std::fmt; +use std::str; use uuid::Uuid; /// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). @@ -15,13 +16,13 @@ use uuid::Uuid; #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct BlankNode { uuid: Uuid, - id: String, + str: [u8; 32], } impl BlankNode { /// Returns the underlying ID of this blank node pub fn as_str(&self) -> &str { - &self.id + str::from_utf8(&self.str).unwrap() } /// Returns the underlying UUID of this blank node @@ -45,10 +46,9 @@ impl Default for BlankNode { impl From for BlankNode { fn from(id: Uuid) -> Self { - Self { - uuid: id, - id: id.to_simple().to_string(), - } + let mut str = [0; 32]; + id.to_simple().encode_lower(&mut str); + Self { uuid: id, str } } } diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index cb28bd35..bec20650 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -3,6 +3,7 @@ use crate::model::*; use crate::sparql::model::*; use lazy_static::lazy_static; +use rio_api::iri::Iri; use rio_api::model as rio; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -1203,74 +1204,111 @@ lazy_static! { static ref EMPTY_DATASET: DatasetSpec = DatasetSpec::default(); } -#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum QueryVariants { Select { dataset: DatasetSpec, algebra: GraphPattern, + base_iri: Option>, }, Construct { construct: Vec, dataset: DatasetSpec, algebra: GraphPattern, + base_iri: Option>, }, Describe { dataset: DatasetSpec, algebra: GraphPattern, + base_iri: Option>, }, Ask { dataset: DatasetSpec, algebra: GraphPattern, + base_iri: Option>, }, } impl fmt::Display for QueryVariants { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - QueryVariants::Select { dataset, algebra } => write!( - f, - "{}", - SparqlGraphRootPattern { - algebra: &algebra, - dataset: &dataset + QueryVariants::Select { + dataset, + algebra, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; } - ), + write!( + f, + "{}", + SparqlGraphRootPattern { + algebra: &algebra, + dataset: &dataset + } + ) + } QueryVariants::Construct { construct, dataset, algebra, - } => write!( - f, - "CONSTRUCT {{ {} }} {} WHERE {{ {} }}", - construct - .iter() - .map(|t| t.to_string()) - .collect::>() - .join(" . "), - dataset, - SparqlGraphRootPattern { - algebra: &algebra, - dataset: &EMPTY_DATASET + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; } - ), - QueryVariants::Describe { dataset, algebra } => write!( - f, - "DESCRIBE * {} WHERE {{ {} }}", + write!( + f, + "CONSTRUCT {{ {} }} {} WHERE {{ {} }}", + construct + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(" . "), + dataset, + SparqlGraphRootPattern { + algebra: &algebra, + dataset: &EMPTY_DATASET + } + ) + } + QueryVariants::Describe { dataset, - SparqlGraphRootPattern { - algebra: &algebra, - dataset: &EMPTY_DATASET + algebra, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri.as_str())?; } - ), - QueryVariants::Ask { dataset, algebra } => write!( - f, - "ASK {} WHERE {{ {} }}", + write!( + f, + "DESCRIBE * {} WHERE {{ {} }}", + dataset, + SparqlGraphRootPattern { + algebra: &algebra, + dataset: &EMPTY_DATASET + } + ) + } + QueryVariants::Ask { dataset, - SparqlGraphRootPattern { - algebra: &algebra, - dataset: &EMPTY_DATASET + algebra, + base_iri, + } => { + if let Some(base_iri) = base_iri { + writeln!(f, "BASE <{}>", base_iri)?; } - ), + write!( + f, + "ASK {} WHERE {{ {} }}", + dataset, + SparqlGraphRootPattern { + algebra: &algebra, + dataset: &EMPTY_DATASET + } + ) + } } } } diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index cd8e1a4d..72c730c5 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -2,23 +2,33 @@ use crate::model::BlankNode; use crate::model::Triple; use crate::sparql::model::*; use crate::sparql::plan::*; -use crate::store::numeric_encoder::MemoryStringStore; use crate::store::numeric_encoder::*; +use crate::store::numeric_encoder::{MemoryStringStore, ENCODED_EMPTY_STRING_LITERAL}; use crate::store::StoreConnection; use crate::Result; use chrono::prelude::*; +use digest::Digest; +use md5::Md5; use num_traits::identities::Zero; use num_traits::FromPrimitive; use num_traits::One; use num_traits::ToPrimitive; -use regex::RegexBuilder; -use rust_decimal::Decimal; +use rand::random; +use regex::{Regex, RegexBuilder}; +use rio_api::iri::Iri; +use rio_api::model as rio; +use rust_decimal::{Decimal, RoundingStrategy}; +use sha1::Sha1; +use sha2::{Sha256, Sha384, Sha512}; use std::cmp::Ordering; use std::collections::BTreeMap; use std::collections::HashSet; +use std::convert::TryInto; +use std::fmt::Write; use std::iter::once; use std::iter::Iterator; use std::ops::Deref; +use std::str; use std::sync::Arc; use std::sync::Mutex; use std::u64; @@ -32,13 +42,17 @@ type EncodedTuplesIterator<'a> = Box> + pub struct SimpleEvaluator { dataset: DatasetView, bnodes_map: Arc>>, + base_iri: Option>>, + now: DateTime, } impl<'a, S: StoreConnection + 'a> SimpleEvaluator { - pub fn new(dataset: S) -> Self { + pub fn new(dataset: S, base_iri: Option>) -> Self { Self { dataset: DatasetView::new(dataset), bnodes_map: Arc::new(Mutex::new(BTreeMap::default())), + base_iri: base_iri.map(Arc::new), + now: Utc::now().with_timezone(&FixedOffset::east(0)), } } @@ -412,25 +426,25 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { NumericBinaryOperands::Float(v1, v2) => (v1 + v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 + v2).into(), NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2)?.into(), - NumericBinaryOperands::Decimal(v1, v2) => (v1 + v2).into(), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2)?.into(), }), PlanExpression::Sub(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? { NumericBinaryOperands::Float(v1, v2) => (v1 - v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 - v2).into(), NumericBinaryOperands::Integer(v1, v2) => v1.checked_sub(v2)?.into(), - NumericBinaryOperands::Decimal(v1, v2) => (v1 - v2).into(), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_sub(v2)?.into(), }), PlanExpression::Mul(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? { NumericBinaryOperands::Float(v1, v2) => (v1 * v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 * v2).into(), NumericBinaryOperands::Integer(v1, v2) => v1.checked_mul(v2)?.into(), - NumericBinaryOperands::Decimal(v1, v2) => (v1 * v2).into(), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_mul(v2)?.into(), }), PlanExpression::Div(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? { NumericBinaryOperands::Float(v1, v2) => (v1 / v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 / v2).into(), NumericBinaryOperands::Integer(v1, v2) => v1.checked_div(v2)?.into(), - NumericBinaryOperands::Decimal(v1, v2) => (v1 / v2).into(), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2)?.into(), }), PlanExpression::UnaryPlus(e) => match self.eval_expression(e, tuple)? { EncodedTerm::FloatLiteral(value) => Some((*value).into()), @@ -461,29 +475,267 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { e if e.is_literal() => Some(ENCODED_EMPTY_STRING_LITERAL), _ => None, }, + PlanExpression::LangMatches(language_tag, language_range) => { + let language_tag = + self.to_simple_string(self.eval_expression(language_tag, tuple)?)?; + let language_range = + self.to_simple_string(self.eval_expression(language_range, tuple)?)?; + Some( + if &*language_range == "*" { + !language_tag.is_empty() + } else { + !ZipLongest::new(language_range.split('-'), language_tag.split('-')).any( + |parts| match parts { + (Some(range_subtag), Some(language_subtag)) => { + !range_subtag.eq_ignore_ascii_case(language_subtag) + } + (Some(_), None) => true, + (None, _) => false, + }, + ) + } + .into(), + ) + } PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(), PlanExpression::Bound(v) => Some(has_tuple_value(*v, tuple).into()), - PlanExpression::IRI(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NamedNode { iri_id } => Some(EncodedTerm::NamedNode { iri_id }), - EncodedTerm::StringLiteral { value_id } => { - Some(EncodedTerm::NamedNode { iri_id: value_id }) + PlanExpression::IRI(e) => { + let iri_id = match self.eval_expression(e, tuple)? { + EncodedTerm::NamedNode { iri_id } => Some(iri_id), + EncodedTerm::StringLiteral { value_id } => Some(value_id), + _ => None, + }?; + let iri = self.dataset.get_str(iri_id).ok()??; + Some(if let Some(base_iri) = &self.base_iri { + EncodedTerm::NamedNode { + iri_id: self + .dataset + .insert_str(&base_iri.resolve(&iri).ok()?.into_inner()) + .ok()?, + } + } else { + Iri::parse(iri).ok()?; + EncodedTerm::NamedNode { iri_id } + }) + } + PlanExpression::BNode(id) => match id { + Some(id) => { + if let EncodedTerm::StringLiteral { value_id } = + self.eval_expression(id, tuple)? + { + Some(EncodedTerm::BlankNode( + *self + .bnodes_map + .lock() + .ok()? + .entry(value_id) + .or_insert_with(Uuid::new_v4), + )) + } else { + None + } } + None => Some(EncodedTerm::BlankNode(Uuid::new_v4())), + }, + PlanExpression::Rand => Some(random::().into()), + PlanExpression::Abs(e) => match self.eval_expression(e, tuple)? { + EncodedTerm::IntegerLiteral(value) => Some(value.checked_abs()?.into()), + EncodedTerm::DecimalLiteral(value) => Some(value.abs().into()), + EncodedTerm::FloatLiteral(value) => Some(value.abs().into()), + EncodedTerm::DoubleLiteral(value) => Some(value.abs().into()), _ => None, }, - PlanExpression::BNode(id) => match id { - Some(id) => match self.eval_expression(id, tuple)? { - EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::BlankNode( - *self - .bnodes_map - .lock() - .ok()? - .entry(value_id) - .or_insert_with(Uuid::new_v4), - )), - _ => None, - }, - None => Some(EncodedTerm::BlankNode(Uuid::new_v4())), + PlanExpression::Ceil(e) => match self.eval_expression(e, tuple)? { + EncodedTerm::IntegerLiteral(value) => Some(value.into()), + EncodedTerm::DecimalLiteral(value) => Some(value.ceil().into()), + EncodedTerm::FloatLiteral(value) => Some(value.ceil().into()), + EncodedTerm::DoubleLiteral(value) => Some(value.ceil().into()), + _ => None, }, + PlanExpression::Floor(e) => match self.eval_expression(e, tuple)? { + EncodedTerm::IntegerLiteral(value) => Some(value.into()), + EncodedTerm::DecimalLiteral(value) => Some(value.floor().into()), + EncodedTerm::FloatLiteral(value) => Some(value.floor().into()), + EncodedTerm::DoubleLiteral(value) => Some(value.floor().into()), + _ => None, + }, + PlanExpression::Round(e) => match self.eval_expression(e, tuple)? { + EncodedTerm::IntegerLiteral(value) => Some(value.into()), + EncodedTerm::DecimalLiteral(value) => Some( + value + .round_dp_with_strategy(0, RoundingStrategy::RoundHalfUp) + .into(), + ), + EncodedTerm::FloatLiteral(value) => Some(value.round().into()), + EncodedTerm::DoubleLiteral(value) => Some(value.round().into()), + _ => None, + }, + PlanExpression::Concat(l) => { + let mut result = String::default(); + let mut language = None; + for e in l { + let (value, e_language) = + self.to_string_and_language(self.eval_expression(e, tuple)?)?; + if let Some(lang) = language { + if lang != e_language { + language = Some(None) + } + } else { + language = Some(e_language) + } + result += &value + } + self.build_plain_literal(&result, language.and_then(|v| v)) + } + PlanExpression::SubStr(source, starting_loc, length) => { + let (source, language) = + self.to_string_and_language(self.eval_expression(source, tuple)?)?; + + let starting_location: usize = if let EncodedTerm::IntegerLiteral(v) = + self.eval_expression(starting_loc, tuple)? + { + v.try_into().ok()? + } else { + return None; + }; + let length: Option = if let Some(length) = length { + if let EncodedTerm::IntegerLiteral(v) = self.eval_expression(length, tuple)? { + Some(v.try_into().ok()?) + } else { + return None; + } + } else { + None + }; + + // We want to slice on char indices, not byte indices + let mut start_iter = source + .char_indices() + .skip(starting_location.checked_sub(1)?) + .peekable(); + let result = if let Some((start_position, _)) = start_iter.peek().cloned() { + if let Some(length) = length { + let mut end_iter = start_iter.skip(length).peekable(); + if let Some((end_position, _)) = end_iter.peek() { + &source[start_position..*end_position] + } else { + &source[start_position..] + } + } else { + &source[start_position..] + } + } else { + "" + }; + self.build_plain_literal(result, language) + } + PlanExpression::StrLen(arg) => Some( + (self + .to_string(self.eval_expression(arg, tuple)?)? + .chars() + .count() as i128) + .into(), + ), + PlanExpression::Replace(arg, pattern, replacement, flags) => { + let regex = self.compile_pattern( + self.eval_expression(pattern, tuple)?, + if let Some(flags) = flags { + Some(self.eval_expression(flags, tuple)?) + } else { + None + }, + )?; + let (text, language) = + self.to_string_and_language(self.eval_expression(arg, tuple)?)?; + let replacement = + self.to_simple_string(self.eval_expression(replacement, tuple)?)?; + self.build_plain_literal(®ex.replace_all(&text, &replacement as &str), language) + } + PlanExpression::UCase(e) => { + let (value, language) = + self.to_string_and_language(self.eval_expression(e, tuple)?)?; + self.build_plain_literal(&value.to_uppercase(), language) + } + PlanExpression::LCase(e) => { + let (value, language) = + self.to_string_and_language(self.eval_expression(e, tuple)?)?; + self.build_plain_literal(&value.to_lowercase(), language) + } + PlanExpression::StrStarts(arg1, arg2) => { + let (arg1, arg2, _) = self.to_argument_compatible_strings( + self.eval_expression(arg1, tuple)?, + self.eval_expression(arg2, tuple)?, + )?; + Some((&arg1).starts_with(&arg2 as &str).into()) + } + PlanExpression::EncodeForURI(ltrl) => { + let ltlr = self.to_string(self.eval_expression(ltrl, tuple)?)?; + let mut result = Vec::with_capacity(ltlr.len()); + for c in ltlr.bytes() { + match c { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + result.push(c) + } + _ => { + result.push(b'%'); + let hight = c / 16; + let low = c % 16; + result.push(if hight < 10 { + b'0' + hight + } else { + b'A' + (hight - 10) + }); + result.push(if low < 10 { + b'0' + low + } else { + b'A' + (low - 10) + }); + } + } + } + Some(EncodedTerm::StringLiteral { + value_id: self + .dataset + .insert_str(str::from_utf8(&result).ok()?) + .ok()?, + }) + } + PlanExpression::StrEnds(arg1, arg2) => { + let (arg1, arg2, _) = self.to_argument_compatible_strings( + self.eval_expression(arg1, tuple)?, + self.eval_expression(arg2, tuple)?, + )?; + Some((&arg1).ends_with(&arg2 as &str).into()) + } + PlanExpression::Contains(arg1, arg2) => { + let (arg1, arg2, _) = self.to_argument_compatible_strings( + self.eval_expression(arg1, tuple)?, + self.eval_expression(arg2, tuple)?, + )?; + Some((&arg1).contains(&arg2 as &str).into()) + } + PlanExpression::StrBefore(arg1, arg2) => { + let (arg1, arg2, language) = self.to_argument_compatible_strings( + self.eval_expression(arg1, tuple)?, + self.eval_expression(arg2, tuple)?, + )?; + if let Some(position) = (&arg1).find(&arg2 as &str) { + self.build_plain_literal(&arg1[..position], language) + } else { + Some(ENCODED_EMPTY_STRING_LITERAL) + } + } + PlanExpression::StrAfter(arg1, arg2) => { + let (arg1, arg2, language) = self.to_argument_compatible_strings( + self.eval_expression(arg1, tuple)?, + self.eval_expression(arg2, tuple)?, + )?; + if let Some(position) = (&arg1).find(&arg2 as &str) { + self.build_plain_literal(&arg1[position + arg2.len()..], language) + } else { + Some(ENCODED_EMPTY_STRING_LITERAL) + } + } PlanExpression::Year(e) => match self.eval_expression(e, tuple)? { EncodedTerm::DateLiteral(date) => Some(date.year().into()), EncodedTerm::NaiveDateLiteral(date) => Some(date.year().into()), @@ -518,23 +770,105 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::Seconds(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveTimeLiteral(time) => Some(time.second().into()), - EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.second().into()), - EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.second().into()), + EncodedTerm::NaiveTimeLiteral(time) => Some( + (Decimal::new(time.nanosecond().into(), 9) + Decimal::from(time.second())) + .into(), + ), + EncodedTerm::DateTimeLiteral(date_time) => Some( + (Decimal::new(date_time.nanosecond().into(), 9) + + Decimal::from(date_time.second())) + .into(), + ), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some( + (Decimal::new(date_time.nanosecond().into(), 9) + + Decimal::from(date_time.second())) + .into(), + ), _ => None, }, - PlanExpression::UUID() => Some(EncodedTerm::NamedNode { + PlanExpression::Timezone(e) => { + let timezone = match self.eval_expression(e, tuple)? { + EncodedTerm::DateLiteral(date) => date.timezone(), + EncodedTerm::DateTimeLiteral(date_time) => date_time.timezone(), + _ => return None, + }; + let mut result = String::with_capacity(9); + let mut shift = timezone.local_minus_utc(); + if shift < 0 { + write!(&mut result, "-").ok()?; + shift = -shift + }; + write!(&mut result, "PT").ok()?; + + let hours = shift / 3600; + if hours > 0 { + write!(&mut result, "{}H", hours).ok()?; + } + + let minutes = (shift / 60) % 60; + if minutes > 0 { + write!(&mut result, "{}M", minutes).ok()?; + } + + let seconds = shift % 60; + if seconds > 0 || shift == 0 { + write!(&mut result, "{}S", seconds).ok()?; + } + Some(EncodedTerm::TypedLiteral { + value_id: self.dataset.insert_str(&result).ok()?, + datatype_id: self + .dataset + .insert_str("http://www.w3.org/2001/XMLSchema#dayTimeDuration") + .ok()?, + }) + } + PlanExpression::Tz(e) => { + let timezone = match self.eval_expression(e, tuple)? { + EncodedTerm::DateLiteral(date) => Some(date.timezone()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.timezone()), + EncodedTerm::NaiveDateLiteral(_) + | EncodedTerm::NaiveTimeLiteral(_) + | EncodedTerm::NaiveDateTimeLiteral(_) => None, + _ => return None, + }; + Some(if let Some(timezone) = timezone { + EncodedTerm::StringLiteral { + value_id: if timezone.local_minus_utc() == 0 { + self.dataset.insert_str("Z").ok()? + } else { + self.dataset.insert_str(&timezone.to_string()).ok()? + }, + } + } else { + ENCODED_EMPTY_STRING_LITERAL + }) + } + PlanExpression::Now => Some(self.now.into()), + PlanExpression::UUID => Some(EncodedTerm::NamedNode { iri_id: self .dataset - .insert_str(&Uuid::new_v4().to_urn().to_string()) + .insert_str( + Uuid::new_v4() + .to_urn() + .encode_lower(&mut Uuid::encode_buffer()), + ) .ok()?, }), - PlanExpression::StrUUID() => Some(EncodedTerm::StringLiteral { + PlanExpression::StrUUID => Some(EncodedTerm::StringLiteral { value_id: self .dataset - .insert_str(&Uuid::new_v4().to_simple().to_string()) + .insert_str( + Uuid::new_v4() + .to_hyphenated() + .encode_lower(&mut Uuid::encode_buffer()), + ) .ok()?, }), + PlanExpression::MD5(arg) => self.hash::(arg, tuple), + PlanExpression::SHA1(arg) => self.hash::(arg, tuple), + PlanExpression::SHA256(arg) => self.hash::(arg, tuple), + PlanExpression::SHA384(arg) => self.hash::(arg, tuple), + PlanExpression::SHA512(arg) => self.hash::(arg, tuple), PlanExpression::Coalesce(l) => { for e in l { if let Some(result) = self.eval_expression(e, tuple) { @@ -558,6 +892,23 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { .to_simple_string_id(self.eval_expression(lang_tag, tuple)?)?, }) } + PlanExpression::StrDT(lexical_form, datatype) => { + let value = self.to_simple_string(self.eval_expression(lexical_form, tuple)?)?; + let datatype = if let EncodedTerm::NamedNode { iri_id } = + self.eval_expression(datatype, tuple)? + { + self.dataset.get_str(iri_id).ok()? + } else { + None + }?; + self.dataset + .encoder() + .encode_rio_literal(rio::Literal::Typed { + value: &value, + datatype: rio::NamedNode { iri: &datatype }, + }) + .ok() + } PlanExpression::SameTerm(a, b) => { Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into()) } @@ -580,55 +931,15 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } .into(), ), - PlanExpression::LangMatches(language_tag, language_range) => { - let language_tag = - self.to_simple_string(self.eval_expression(language_tag, tuple)?)?; - let language_range = - self.to_simple_string(self.eval_expression(language_range, tuple)?)?; - Some( - if &*language_range == "*" { - !language_tag.is_empty() - } else { - !ZipLongest::new(language_range.split('-'), language_tag.split('-')).any( - |parts| match parts { - (Some(range_subtag), Some(language_subtag)) => { - !range_subtag.eq_ignore_ascii_case(language_subtag) - } - (Some(_), None) => true, - (None, _) => false, - }, - ) - } - .into(), - ) - } PlanExpression::Regex(text, pattern, flags) => { - // TODO Avoid to compile the regex each time - let pattern = self.to_simple_string(self.eval_expression(pattern, tuple)?)?; - let mut regex_builder = RegexBuilder::new(&pattern); - regex_builder.size_limit(REGEX_SIZE_LIMIT); - if let Some(flags) = flags { - let flags = self.to_simple_string(self.eval_expression(flags, tuple)?)?; - for flag in flags.chars() { - match flag { - 's' => { - regex_builder.dot_matches_new_line(true); - } - 'm' => { - regex_builder.multi_line(true); - } - 'i' => { - regex_builder.case_insensitive(true); - } - 'x' => { - regex_builder.ignore_whitespace(true); - } - 'q' => (), //TODO: implement - _ => (), - } - } - } - let regex = regex_builder.build().ok()?; + let regex = self.compile_pattern( + self.eval_expression(pattern, tuple)?, + if let Some(flags) = flags { + Some(self.eval_expression(flags, tuple)?) + } else { + None + }, + )?; let text = self.to_string(self.eval_expression(text, tuple)?)?; Some(regex.is_match(&text).into()) } @@ -806,6 +1117,82 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } } + fn to_string_and_language( + &self, + term: EncodedTerm, + ) -> Option<( as StringStore>::StringType, Option)> { + match term { + EncodedTerm::StringLiteral { value_id } => { + Some((self.dataset.get_str(value_id).ok()??, None)) + } + EncodedTerm::LangStringLiteral { + value_id, + language_id, + } => Some((self.dataset.get_str(value_id).ok()??, Some(language_id))), + _ => None, + } + } + + fn build_plain_literal(&self, value: &str, language: Option) -> Option { + Some(if let Some(language_id) = language { + EncodedTerm::LangStringLiteral { + value_id: self.dataset.insert_str(value).ok()?, + language_id, + } + } else { + EncodedTerm::StringLiteral { + value_id: self.dataset.insert_str(value).ok()?, + } + }) + } + + fn to_argument_compatible_strings( + &self, + arg1: EncodedTerm, + arg2: EncodedTerm, + ) -> Option<( + as StringStore>::StringType, + as StringStore>::StringType, + Option, + )> { + let (value1, language1) = self.to_string_and_language(arg1)?; + let (value2, language2) = self.to_string_and_language(arg2)?; + if language2.is_none() || language1 == language2 { + Some((value1, value2, language1)) + } else { + None + } + } + + fn compile_pattern(&self, pattern: EncodedTerm, flags: Option) -> Option { + // TODO Avoid to compile the regex each time + let pattern = self.to_simple_string(pattern)?; + let mut regex_builder = RegexBuilder::new(&pattern); + regex_builder.size_limit(REGEX_SIZE_LIMIT); + if let Some(flags) = flags { + let flags = self.to_simple_string(flags)?; + for flag in flags.chars() { + match flag { + 's' => { + regex_builder.dot_matches_new_line(true); + } + 'm' => { + regex_builder.multi_line(true); + } + 'i' => { + regex_builder.case_insensitive(true); + } + 'x' => { + regex_builder.ignore_whitespace(true); + } + 'q' => (), //TODO: implement + _ => (), + } + } + } + regex_builder.build().ok() + } + fn parse_numeric_operands( &self, e1: &PlanExpression, @@ -1118,6 +1505,18 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { .cmp(&self.dataset.get_str(b).ok()??), ) } + + fn hash( + &self, + arg: &PlanExpression, + tuple: &[Option], + ) -> Option { + let input = self.to_simple_string(self.eval_expression(arg, tuple)?)?; + let hash = hex::encode(H::new().chain(&input as &str).result()); + Some(EncodedTerm::StringLiteral { + value_id: self.dataset.insert_str(&hash).ok()?, + }) + } } #[derive(Clone)] diff --git a/lib/src/sparql/mod.rs b/lib/src/sparql/mod.rs index 6632a997..093b4e66 100644 --- a/lib/src/sparql/mod.rs +++ b/lib/src/sparql/mod.rs @@ -60,28 +60,31 @@ impl SimplePreparedQuery { QueryVariants::Select { algebra, dataset: _, + base_iri, } => { let (plan, variables) = PlanBuilder::build(&connection, &algebra)?; SimplePreparedQueryOptions::Select { plan, variables, - evaluator: SimpleEvaluator::new(connection), + evaluator: SimpleEvaluator::new(connection, base_iri), } } QueryVariants::Ask { algebra, dataset: _, + base_iri, } => { let (plan, _) = PlanBuilder::build(&connection, &algebra)?; SimplePreparedQueryOptions::Ask { plan, - evaluator: SimpleEvaluator::new(connection), + evaluator: SimpleEvaluator::new(connection, base_iri), } } QueryVariants::Construct { construct, algebra, dataset: _, + base_iri, } => { let (plan, variables) = PlanBuilder::build(&connection, &algebra)?; SimplePreparedQueryOptions::Construct { @@ -91,17 +94,18 @@ impl SimplePreparedQuery { &construct, variables, )?, - evaluator: SimpleEvaluator::new(connection), + evaluator: SimpleEvaluator::new(connection, base_iri), } } QueryVariants::Describe { algebra, dataset: _, + base_iri, } => { let (plan, _) = PlanBuilder::build(&connection, &algebra)?; SimplePreparedQueryOptions::Describe { plan, - evaluator: SimpleEvaluator::new(connection), + evaluator: SimpleEvaluator::new(connection, base_iri), } } })) @@ -132,7 +136,7 @@ impl PreparedQuery for SimplePreparedQuery { } /// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/) -#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct Query(QueryVariants); impl fmt::Display for Query { diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index fabfc7f4..91ff9ea9 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -179,13 +179,17 @@ pub enum PlanExpression { Bound(usize), IRI(Box), BNode(Option>), - /*Rand(), + Rand, Abs(Box), Ceil(Box), Floor(Box), Round(Box), Concat(Vec), - SubStr(Box, Box, Option>), + SubStr( + Box, + Box, + Option>, + ), StrLen(Box), Replace( Box, @@ -200,23 +204,23 @@ pub enum PlanExpression { StrStarts(Box, Box), StrEnds(Box, Box), StrBefore(Box, Box), - StrAfter(Box, Box),*/ + StrAfter(Box, Box), Year(Box), Month(Box), Day(Box), Hours(Box), Minutes(Box), Seconds(Box), - /*Timezone(Box), + Timezone(Box), Tz(Box), - Now(),*/ - UUID(), - StrUUID(), - /*MD5(Box), + Now, + UUID, + StrUUID, + MD5(Box), SHA1(Box), SHA256(Box), SHA384(Box), - SHA512(Box),*/ + SHA512(Box), Coalesce(Vec), If( Box, @@ -224,7 +228,7 @@ pub enum PlanExpression { Box, ), StrLang(Box, Box), - //StrDT(Box, Box), + StrDT(Box, Box), SameTerm(Box, Box), IsIRI(Box), IsBlank(Box), @@ -251,30 +255,13 @@ impl PlanExpression { match self { PlanExpression::Constant(_) | PlanExpression::BNode(None) - | PlanExpression::UUID() - | PlanExpression::StrUUID() => (), + | PlanExpression::UUID + | PlanExpression::StrUUID + | PlanExpression::Rand + | PlanExpression::Now => (), PlanExpression::Variable(v) | PlanExpression::Bound(v) => { set.insert(*v); } - PlanExpression::Or(a, b) - | PlanExpression::And(a, b) - | PlanExpression::Equal(a, b) - | PlanExpression::NotEqual(a, b) - | PlanExpression::Greater(a, b) - | PlanExpression::GreaterOrEq(a, b) - | PlanExpression::Lower(a, b) - | PlanExpression::LowerOrEq(a, b) - | PlanExpression::Add(a, b) - | PlanExpression::Sub(a, b) - | PlanExpression::Mul(a, b) - | PlanExpression::Div(a, b) - | PlanExpression::SameTerm(a, b) - | PlanExpression::LangMatches(a, b) - | PlanExpression::StrLang(a, b) - | PlanExpression::Regex(a, b, None) => { - a.add_variables(set); - b.add_variables(set); - } PlanExpression::UnaryPlus(e) | PlanExpression::UnaryMinus(e) | PlanExpression::UnaryNot(e) @@ -301,23 +288,68 @@ impl PlanExpression { | PlanExpression::DateCast(e) | PlanExpression::TimeCast(e) | PlanExpression::DateTimeCast(e) - | PlanExpression::StringCast(e) => { + | PlanExpression::StringCast(e) + | PlanExpression::Abs(e) + | PlanExpression::Ceil(e) + | PlanExpression::Floor(e) + | PlanExpression::Round(e) + | PlanExpression::StrLen(e) + | PlanExpression::UCase(e) + | PlanExpression::LCase(e) + | PlanExpression::EncodeForURI(e) + | PlanExpression::Timezone(e) + | PlanExpression::Tz(e) + | PlanExpression::MD5(e) + | PlanExpression::SHA1(e) + | PlanExpression::SHA256(e) + | PlanExpression::SHA384(e) + | PlanExpression::SHA512(e) => { e.add_variables(set); } - PlanExpression::Coalesce(l) => { - for e in l { - e.add_variables(set); - } + PlanExpression::Or(a, b) + | PlanExpression::And(a, b) + | PlanExpression::Equal(a, b) + | PlanExpression::NotEqual(a, b) + | PlanExpression::Greater(a, b) + | PlanExpression::GreaterOrEq(a, b) + | PlanExpression::Lower(a, b) + | PlanExpression::LowerOrEq(a, b) + | PlanExpression::Add(a, b) + | PlanExpression::Sub(a, b) + | PlanExpression::Mul(a, b) + | PlanExpression::Div(a, b) + | PlanExpression::SameTerm(a, b) + | PlanExpression::LangMatches(a, b) + | PlanExpression::StrLang(a, b) + | PlanExpression::Contains(a, b) + | PlanExpression::StrStarts(a, b) + | PlanExpression::StrEnds(a, b) + | PlanExpression::StrBefore(a, b) + | PlanExpression::StrAfter(a, b) + | PlanExpression::StrDT(a, b) + | PlanExpression::Regex(a, b, None) + | PlanExpression::SubStr(a, b, None) => { + a.add_variables(set); + b.add_variables(set); } - PlanExpression::If(a, b, c) => { + PlanExpression::If(a, b, c) + | PlanExpression::SubStr(a, b, Some(c)) + | PlanExpression::Replace(a, b, c, None) + | PlanExpression::Regex(a, b, Some(c)) => { a.add_variables(set); b.add_variables(set); c.add_variables(set); } - PlanExpression::Regex(a, b, Some(c)) => { + PlanExpression::Replace(a, b, c, Some(d)) => { a.add_variables(set); b.add_variables(set); c.add_variables(set); + d.add_variables(set); + } + PlanExpression::Coalesce(l) | PlanExpression::Concat(l) => { + for e in l { + e.add_variables(set); + } } PlanExpression::In(e, l) => { e.add_variables(set); diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index eef48bb7..64841fd6 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -305,6 +305,91 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> { )), None => None, }), + Function::Rand => PlanExpression::Rand, + Function::Abs => PlanExpression::Abs(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Ceil => PlanExpression::Ceil(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Floor => PlanExpression::Floor(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Round => PlanExpression::Round(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Concat => PlanExpression::Concat(self.expression_list( + ¶meters, + variables, + graph_name, + )?), + Function::SubStr => PlanExpression::SubStr( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + match parameters.get(2) { + Some(flags) => Some(Box::new( + self.build_for_expression(flags, variables, graph_name)?, + )), + None => None, + }, + ), + Function::StrLen => PlanExpression::StrLen(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Replace => PlanExpression::Replace( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[2], variables, graph_name)?), + match parameters.get(3) { + Some(flags) => Some(Box::new( + self.build_for_expression(flags, variables, graph_name)?, + )), + None => None, + }, + ), + Function::UCase => PlanExpression::UCase(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::LCase => PlanExpression::LCase(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::EncodeForURI => PlanExpression::EncodeForURI(Box::new( + self.build_for_expression(¶meters[0], variables, graph_name)?, + )), + Function::Contains => PlanExpression::Contains( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), + Function::StrStarts => PlanExpression::StrStarts( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), + Function::StrEnds => PlanExpression::StrEnds( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), + Function::StrBefore => PlanExpression::StrBefore( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), + Function::StrAfter => PlanExpression::StrAfter( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), Function::Year => PlanExpression::Year(Box::new(self.build_for_expression( ¶meters[0], variables, @@ -335,8 +420,42 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> { variables, graph_name, )?)), - Function::UUID => PlanExpression::UUID(), - Function::StrUUID => PlanExpression::StrUUID(), + Function::Timezone => PlanExpression::Timezone(Box::new( + self.build_for_expression(¶meters[0], variables, graph_name)?, + )), + Function::Tz => PlanExpression::Tz(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::Now => PlanExpression::Now, + Function::UUID => PlanExpression::UUID, + Function::StrUUID => PlanExpression::StrUUID, + Function::MD5 => PlanExpression::MD5(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::SHA1 => PlanExpression::SHA1(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::SHA256 => PlanExpression::SHA256(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::SHA384 => PlanExpression::SHA384(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), + Function::SHA512 => PlanExpression::SHA512(Box::new(self.build_for_expression( + ¶meters[0], + variables, + graph_name, + )?)), Function::Coalesce => PlanExpression::Coalesce(self.expression_list( ¶meters, variables, @@ -351,6 +470,10 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> { Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), ), + Function::StrDT => PlanExpression::StrDT( + Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), + Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), + ), Function::SameTerm => PlanExpression::SameTerm( Box::new(self.build_for_expression(¶meters[0], variables, graph_name)?), Box::new(self.build_for_expression(¶meters[1], variables, graph_name)?), @@ -458,7 +581,6 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> { Err(format_err!("Not supported custom function {}", expression))? } } - _ => unimplemented!(), }, Expression::Bound(v) => PlanExpression::Bound(variable_key(variables, v)), Expression::Exists(n) => PlanExpression::Exists(Box::new( diff --git a/lib/src/sparql/sparql_grammar.rustpeg b/lib/src/sparql/sparql_grammar.rustpeg index 18c42051..c7ef6294 100644 --- a/lib/src/sparql/sparql_grammar.rustpeg +++ b/lib/src/sparql/sparql_grammar.rustpeg @@ -32,7 +32,8 @@ PrefixDecl -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { SelectQuery -> QueryVariants = s:SelectClause _ d:DatasetClauses _ w:WhereClause _ g:GroupClause? _ h:HavingClause? _ o:OrderClause? _ l:LimitOffsetClauses? _ v:ValuesClause { //TODO: Modifier QueryVariants::Select { dataset: d, - algebra: build_select(s, w, g, h, o, l, v, state) + algebra: build_select(s, w, g, h, o, l, v, state), + base_iri: state.base_iri.clone() } } @@ -65,7 +66,8 @@ ConstructQuery -> QueryVariants = QueryVariants::Construct { construct: c, dataset: d, - algebra: build_select(Selection::default(), w, g, h, o, l, v, state) + algebra: build_select(Selection::default(), w, g, h, o, l, v, state), + base_iri: state.base_iri.clone() } } / "CONSTRUCT"i _ d:DatasetClauses _ "WHERE"i _ '{' _ c:ConstructQuery_optional_triple_template _ '}' _ g:GroupClause? _ h:HavingClause? _ o:OrderClause? _ l:LimitOffsetClauses? _ v:ValuesClause { @@ -76,7 +78,8 @@ ConstructQuery -> QueryVariants = Selection::default(), GraphPattern::BGP(c.into_iter().map(TripleOrPathPattern::from).collect()), g, h, o, l, v, state - ) + ), + base_iri: state.base_iri.clone() } } @@ -87,7 +90,8 @@ DescribeQuery -> QueryVariants = "DESCRIBE"i _ '*' _ d:DatasetClauses w:WhereClause? _ g:GroupClause? _ h:HavingClause? _ o:OrderClause? _ l:LimitOffsetClauses? _ v:ValuesClause { QueryVariants::Describe { dataset: d, - algebra: build_select(Selection::default(), w.unwrap_or_else(GraphPattern::default), g, h, o, l, v, state) + algebra: build_select(Selection::default(), w.unwrap_or_else(GraphPattern::default), g, h, o, l, v, state), + base_iri: state.base_iri.clone() } } / "DESCRIBE"i _ p:DescribeQuery_item+ _ d:DatasetClauses w:WhereClause? _ g:GroupClause? _ h:HavingClause? _ o:OrderClause? _ l:LimitOffsetClauses? _ v:ValuesClause { @@ -99,7 +103,8 @@ DescribeQuery -> QueryVariants = NamedNodeOrVariable::NamedNode(n) => SelectionMember::Expression(n.into(), Variable::default()), NamedNodeOrVariable::Variable(v) => SelectionMember::Variable(v) }).collect()) - }, w.unwrap_or_else(GraphPattern::default), g, h, o, l, v, state) + }, w.unwrap_or_else(GraphPattern::default), g, h, o, l, v, state), + base_iri: state.base_iri.clone() } } DescribeQuery_item -> NamedNodeOrVariable = i:VarOrIri _ { i } @@ -108,7 +113,8 @@ DescribeQuery_item -> NamedNodeOrVariable = i:VarOrIri _ { i } AskQuery -> QueryVariants = "ASK"i _ d:DatasetClauses w:WhereClause _ g:GroupClause? _ h:HavingClause? _ o:OrderClause? _ l:LimitOffsetClauses? _ v:ValuesClause { QueryVariants::Ask { dataset: d, - algebra: build_select(Selection::default(), w, g, h, o, l, v, state) + algebra: build_select(Selection::default(), w, g, h, o, l, v, state), + base_iri: state.base_iri.clone() } } diff --git a/lib/src/sparql/xml_results.rs b/lib/src/sparql/xml_results.rs index 3106cb18..bc3cecc6 100644 --- a/lib/src/sparql/xml_results.rs +++ b/lib/src/sparql/xml_results.rs @@ -154,10 +154,18 @@ pub fn read_xml_results<'a>(source: impl BufRead + 'a) -> Result return Err(format_err!("Expecting tag, found {}", reader.decode(event.name())?)); } } - State::Head => if event.name() == b"variable" || event.name() == b"link" { - return Err(format_err!(" and tag should be autoclosing")); - } else { - return Err(format_err!("Expecting or tag, found {}", reader.decode(event.name())?)); + State::Head => { + if event.name() == b"variable" { + let name = event.attributes() + .filter_map(|attr| attr.ok()) + .find(|attr| attr.key == b"name") + .ok_or_else(|| format_err!("No name attribute found for the tag"))?; + variables.push(name.unescape_and_decode_value(&reader)?); + } else if event.name() == b"link" { + // no op + } else { + return Err(format_err!("Expecting or tag, found {}", reader.decode(event.name())?)); + } } State::AfterHead => { if event.name() == b"boolean" { @@ -184,6 +192,13 @@ pub fn read_xml_results<'a>(source: impl BufRead + 'a) -> Result State::Boolean => return Err(format_err!("Unexpected tag inside of tag: {}", reader.decode(event.name())?)) }, Event::Empty(event) => match state { + State::Sparql => { + if event.name() == b"head" { + state = State::AfterHead; + } else { + return Err(format_err!("Expecting tag, found {}", reader.decode(event.name())?)); + } + } State::Head => { if event.name() == b"variable" { let name = event.attributes() @@ -366,7 +381,12 @@ impl ResultsIterator { } State::Literal => { term = Some( - build_literal(self.reader.decode(&data)?, &lang, &datatype).into(), + build_literal( + self.reader.decode(&data)?, + lang.take(), + datatype.take(), + ) + .into(), ); } _ => { @@ -400,7 +420,7 @@ impl ResultsIterator { State::Literal => { if term.is_none() { //We default to the empty literal - term = Some(build_literal("", &lang, &datatype).into()) + term = Some(build_literal("", lang.take(), datatype.take()).into()) } state = State::Binding; } @@ -415,13 +435,13 @@ impl ResultsIterator { fn build_literal( value: impl Into, - lang: &Option, - datatype: &Option, + lang: Option, + datatype: Option, ) -> Literal { match datatype { - Some(datatype) => Literal::new_typed_literal(value, datatype.clone()), + Some(datatype) => Literal::new_typed_literal(value, datatype), None => match lang { - Some(lang) => Literal::new_language_tagged_literal(value, lang.clone()), + Some(lang) => Literal::new_language_tagged_literal(value, lang), None => Literal::new_simple_literal(value), }, } diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 1b789f04..af3a91f8 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -89,6 +89,7 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/bind/manifest.ttl", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/construct/manifest.ttl", "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/exists/manifest.ttl", + "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest.ttl", ]; let test_blacklist = vec![ @@ -116,6 +117,10 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-datatype-2").unwrap(), // FROM support NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/construct/manifest#constructwhere04").unwrap(), + //BNODE() scope is currently wrong + NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#bnode01").unwrap(), + //Decimal precision problem + NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#coalesce01").unwrap(), ]; let mut failed = Vec::default(); @@ -211,7 +216,9 @@ fn load_graph_to_repository( connection: &<&MemoryRepository as Repository>::Connection, to_graph_name: Option<&NamedOrBlankNode>, ) -> Result<()> { - let syntax = if url.ends_with(".ttl") { + let syntax = if url.ends_with(".nt") { + GraphSyntax::NTriples + } else if url.ends_with(".ttl") { GraphSyntax::Turtle } else if url.ends_with(".rdf") { GraphSyntax::RdfXml