Evaluates more SPARQL built-in functions

pull/10/head
Tpt 6 years ago
parent fc9a987dd7
commit 909c7abd07
  1. 1
      lib/Cargo.toml
  2. 1
      lib/src/lib.rs
  3. 141
      lib/src/sparql/eval.rs
  4. 6
      lib/src/store/numeric_encoder.rs
  5. 68
      lib/tests/sparql_test_cases.rs

@ -27,6 +27,7 @@ ordered-float = "1"
num-traits = "0.2" num-traits = "0.2"
rust_decimal = "0.10" rust_decimal = "0.10"
chrono = "0.4" chrono = "0.4"
language-tags = "0.2"
[build-dependencies] [build-dependencies]
peg = "0.5" peg = "0.5"

@ -37,6 +37,7 @@ extern crate error_chain;
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
extern crate chrono; extern crate chrono;
extern crate language_tags;
extern crate num_traits; extern crate num_traits;
extern crate ordered_float; extern crate ordered_float;
extern crate quick_xml; extern crate quick_xml;

@ -1,5 +1,7 @@
use chrono::DateTime; use chrono::DateTime;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use language_tags::LanguageTag;
use model::BlankNode;
use num_traits::identities::Zero; use num_traits::identities::Zero;
use num_traits::FromPrimitive; use num_traits::FromPrimitive;
use num_traits::One; use num_traits::One;
@ -8,10 +10,12 @@ use ordered_float::OrderedFloat;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use sparql::algebra::*; use sparql::algebra::*;
use sparql::plan::*; use sparql::plan::*;
use std::cmp::Ordering;
use std::collections::HashSet; use std::collections::HashSet;
use std::iter::once; use std::iter::once;
use std::iter::Iterator; use std::iter::Iterator;
use std::str; use std::str;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use store::encoded::EncodedQuadsStore; use store::encoded::EncodedQuadsStore;
use store::numeric_encoder::*; use store::numeric_encoder::*;
@ -277,23 +281,47 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
}, },
}, },
PlanExpression::Equal(a, b) => { PlanExpression::Equal(a, b) => {
Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into()) let a = self.eval_expression(a, tuple)?;
let b = self.eval_expression(b, tuple)?;
Some((a == b || self.partial_cmp_terms(a, b) == Some(Ordering::Equal)).into())
} }
PlanExpression::NotEqual(a, b) => { PlanExpression::NotEqual(a, b) => {
Some((self.eval_expression(a, tuple)? != self.eval_expression(b, tuple)?).into()) let a = self.eval_expression(a, tuple)?;
} let b = self.eval_expression(b, tuple)?;
PlanExpression::Greater(a, b) => { Some((a != b && self.partial_cmp_terms(a, b) != Some(Ordering::Equal)).into())
Some((self.eval_expression(a, tuple)? > self.eval_expression(b, tuple)?).into()) }
} PlanExpression::Greater(a, b) => Some(
PlanExpression::GreaterOrEq(a, b) => { (self.partial_cmp_terms(
Some((self.eval_expression(a, tuple)? >= self.eval_expression(b, tuple)?).into()) self.eval_expression(a, tuple)?,
} self.eval_expression(b, tuple)?,
PlanExpression::Lower(a, b) => { )? == Ordering::Greater)
Some((self.eval_expression(a, tuple)? < self.eval_expression(b, tuple)?).into()) .into(),
} ),
PlanExpression::LowerOrEq(a, b) => { PlanExpression::GreaterOrEq(a, b) => Some(
Some((self.eval_expression(a, tuple)? <= self.eval_expression(b, tuple)?).into()) match self.partial_cmp_terms(
} self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?,
)? {
Ordering::Greater | Ordering::Equal => true,
_ => false,
}.into(),
),
PlanExpression::Lower(a, b) => Some(
(self.partial_cmp_terms(
self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?,
)? == Ordering::Less)
.into(),
),
PlanExpression::LowerOrEq(a, b) => Some(
match self.partial_cmp_terms(
self.eval_expression(a, tuple)?,
self.eval_expression(b, tuple)?,
)? {
Ordering::Less | Ordering::Equal => true,
_ => false,
}.into(),
),
PlanExpression::Add(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? { PlanExpression::Add(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? {
NumericBinaryOperands::Float(v1, v2) => (v1 + v2).into(), NumericBinaryOperands::Float(v1, v2) => (v1 + v2).into(),
NumericBinaryOperands::Double(v1, v2) => (v1 + v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 + v2).into(),
@ -344,6 +372,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
value_id: language_id, value_id: language_id,
}) })
} }
e if e.is_literal() => Some(ENCODED_EMPTY_SIMPLE_LITERAL),
_ => None, _ => None,
}, },
PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(), PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(),
@ -356,6 +385,10 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
} }
_ => None, _ => None,
}, },
PlanExpression::BNode(id) => match id {
Some(id) => unimplemented!(),
None => Some(BlankNode::default().into()),
},
PlanExpression::SameTerm(a, b) => { PlanExpression::SameTerm(a, b) => {
Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into()) Some((self.eval_expression(a, tuple)? == self.eval_expression(b, tuple)?).into())
} }
@ -368,6 +401,31 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
PlanExpression::IsLiteral(e) => { PlanExpression::IsLiteral(e) => {
Some(self.eval_expression(e, tuple)?.is_literal().into()) Some(self.eval_expression(e, tuple)?.is_literal().into())
} }
PlanExpression::IsNumeric(e) => Some(
match self.eval_expression(e, tuple)? {
EncodedTerm::FloatLiteral(_)
| EncodedTerm::DoubleLiteral(_)
| EncodedTerm::IntegerLiteral(_)
| EncodedTerm::DecimalLiteral(_) => true,
_ => false,
}.into(),
),
PlanExpression::LangMatches(language_tag, language_range) => {
let language_tag =
self.to_simple_string(self.eval_expression(language_tag, tuple)?)?;
let language_range =
self.to_simple_string(self.eval_expression(language_range, tuple)?)?;
Some(
if language_range == "*" {
!language_tag.is_empty()
} else {
LanguageTag::from_str(&language_range)
.ok()?
.matches(&LanguageTag::from_str(&language_tag).ok()?)
}.into(),
)
}
PlanExpression::Regex(text, pattern, flags) => unimplemented!(),
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
@ -473,7 +531,6 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral { PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral {
value_id: self.to_string_id(self.eval_expression(e, tuple)?)?, value_id: self.to_string_id(self.eval_expression(e, tuple)?)?,
}), }),
e => unimplemented!(),
} }
} }
@ -517,6 +574,18 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
} }
} }
fn to_simple_string(&self, term: EncodedTerm) -> Option<String> {
if let EncodedTerm::SimpleLiteral { value_id } = term {
Some(
str::from_utf8(&self.store.get_bytes(value_id).ok()??)
.ok()?
.to_owned(),
)
} else {
None
}
}
fn parse_numeric_operands( fn parse_numeric_operands(
&self, &self,
e1: &PlanExpression, e1: &PlanExpression,
@ -600,6 +669,46 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
})), })),
) )
} }
fn partial_cmp_terms(&self, a: EncodedTerm, b: EncodedTerm) -> Option<Ordering> {
match a {
EncodedTerm::SimpleLiteral { value_id: a }
| EncodedTerm::StringLiteral { value_id: a } => match b {
EncodedTerm::SimpleLiteral { value_id: b }
| EncodedTerm::StringLiteral { value_id: b } => a.partial_cmp(&b),
_ => None,
},
EncodedTerm::FloatLiteral(a) => match b {
EncodedTerm::FloatLiteral(b) => (*a).partial_cmp(&*b),
EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b),
EncodedTerm::IntegerLiteral(b) => (*a).partial_cmp(&b.to_f32()?),
EncodedTerm::DecimalLiteral(b) => (*a).partial_cmp(&b.to_f32()?),
_ => None,
},
EncodedTerm::DoubleLiteral(a) => match b {
EncodedTerm::FloatLiteral(b) => (*a).partial_cmp(&b.to_f64()?),
EncodedTerm::DoubleLiteral(b) => (*a).partial_cmp(&*b),
EncodedTerm::IntegerLiteral(b) => (*a).partial_cmp(&b.to_f64()?),
EncodedTerm::DecimalLiteral(b) => (*a).partial_cmp(&b.to_f64()?),
_ => None,
},
EncodedTerm::IntegerLiteral(a) => match b {
EncodedTerm::FloatLiteral(b) => a.to_f32()?.partial_cmp(&*b),
EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b),
EncodedTerm::IntegerLiteral(b) => a.partial_cmp(&b),
EncodedTerm::DecimalLiteral(b) => Decimal::from_i128(a)?.partial_cmp(&b),
_ => None,
},
EncodedTerm::DecimalLiteral(a) => match b {
EncodedTerm::FloatLiteral(b) => a.to_f32()?.partial_cmp(&*b),
EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b),
EncodedTerm::IntegerLiteral(b) => a.partial_cmp(&Decimal::from_i128(b)?),
EncodedTerm::DecimalLiteral(b) => a.partial_cmp(&b),
_ => None,
},
_ => None,
}
}
} }
enum NumericBinaryOperands { enum NumericBinaryOperands {

@ -234,6 +234,12 @@ impl From<NaiveDateTime> for EncodedTerm {
} }
} }
impl From<BlankNode> for EncodedTerm {
fn from(node: BlankNode) -> Self {
EncodedTerm::BlankNode(*node.as_uuid())
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct EncodedQuad { pub struct EncodedQuad {
pub subject: EncodedTerm, pub subject: EncodedTerm,

@ -96,6 +96,12 @@ fn sparql_w3c_query_evaluation_testsuite() {
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/cast/manifest.ttl").unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/cast/manifest.ttl").unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest.ttl") Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest.ttl")
.unwrap(), .unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-ops/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest.ttl").unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest.ttl").unwrap(),
Url::parse( Url::parse(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest.ttl", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest.ttl",
@ -113,6 +119,31 @@ fn sparql_w3c_query_evaluation_testsuite() {
NamedNode::from_str( NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9",
).unwrap(), ).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-1",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-2",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-1",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-2",
).unwrap(),
//Multiple writing of the same xsd:double. Our system does strong normalization.
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-simple",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-eq",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-not-eq",
).unwrap(),
//URI normalization: we are normalizing more strongly //URI normalization: we are normalizing more strongly
NamedNode::from_str( NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-3", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-3",
@ -121,6 +152,27 @@ fn sparql_w3c_query_evaluation_testsuite() {
NamedNode::from_str( NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified", "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified",
).unwrap(), ).unwrap(),
//Case insensitive language tag comparison
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#lang-case-insensitive-eq",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#lang-case-insensitive-ne",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-lang-3",
).unwrap(),
//Difference in language matching
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-langMatches-basic",
).unwrap(),
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-langMatches-basic",
).unwrap(),
//DATATYPE("foo"@en) returns rdf:langString in SPARQL 1.1
NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-datatype-2",
).unwrap(),
]; ];
let client = RDFClient::default(); let client = RDFClient::default();
@ -170,14 +222,14 @@ fn sparql_w3c_query_evaluation_testsuite() {
.load_sparql_query_result_graph(test.result.clone().unwrap()) .load_sparql_query_result_graph(test.result.clone().unwrap())
.unwrap(); .unwrap();
assert!( assert!(
actual_graph.is_isomorphic(&expected_graph).unwrap(), actual_graph.is_isomorphic(&expected_graph).unwrap(),
"Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n", "Failure on {}.\nExpected file:\n{}\nOutput file:\n{}\nParsed query:\n{}\nData:\n{}\n",
test, test,
expected_graph, expected_graph,
actual_graph, actual_graph,
client.load_sparql_query(test.query.clone()).unwrap(), client.load_sparql_query(test.query.clone()).unwrap(),
data data
) )
} }
} }
} else { } else {

Loading…
Cancel
Save