From 25397c57517db59de378bb03437dfd5c7ba668f1 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 18 Feb 2019 16:27:23 +0100 Subject: [PATCH] Drops support of plain literals They are merged with xsd:string datatyped literal in RDF 1.1 --- lib/src/model/literal.rs | 32 ++++++----------- lib/src/sparql/eval.rs | 62 +++++++++++++------------------- lib/src/store/numeric_encoder.rs | 29 ++++----------- lib/tests/sparql_test_cases.rs | 2 ++ 4 files changed, 44 insertions(+), 81 deletions(-) diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index 6459f404..0be4c314 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -40,7 +40,6 @@ pub struct Literal(LiteralContent); #[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Clone, Hash)] enum LiteralContent { - SimpleLiteral(String), String(String), LanguageTaggedString { value: String, language: String }, Boolean(bool), @@ -58,7 +57,7 @@ enum LiteralContent { impl Literal { /// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) pub fn new_simple_literal(value: impl Into) -> Self { - Literal(LiteralContent::SimpleLiteral(value.into())) + Literal(LiteralContent::String(value.into())) } /// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri) @@ -143,8 +142,7 @@ impl Literal { /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) pub fn value(&self) -> Cow<'_, str> { match self.0 { - LiteralContent::SimpleLiteral(ref value) - | LiteralContent::String(ref value) + LiteralContent::String(ref value) | LiteralContent::LanguageTaggedString { ref value, .. } | LiteralContent::TypedLiteral { ref value, .. } => Cow::Borrowed(value), LiteralContent::Boolean(value) => Cow::Owned(value.to_string()), @@ -174,7 +172,7 @@ impl Literal { /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string). pub fn datatype(&self) -> &NamedNode { match self.0 { - LiteralContent::SimpleLiteral(_) | LiteralContent::String(_) => &xsd::STRING, + LiteralContent::String(_) => &xsd::STRING, LiteralContent::LanguageTaggedString { .. } => &rdf::LANG_STRING, LiteralContent::Boolean(_) => &xsd::BOOLEAN, LiteralContent::Float(_) => &xsd::FLOAT, @@ -191,10 +189,10 @@ impl Literal { /// Checks if it could be considered as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal). /// /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) - /// or have been created by `Literal::new_simple_literal`. + /// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string). pub fn is_plain(&self) -> bool { match self.0 { - LiteralContent::SimpleLiteral(_) | LiteralContent::LanguageTaggedString { .. } => true, + LiteralContent::String(_) | LiteralContent::LanguageTaggedString { .. } => true, _ => false, } } @@ -282,9 +280,7 @@ impl Literal { /// Returns the [effective boolean value](https://www.w3.org/TR/sparql11-query/#ebv) of the literal if it exists pub fn to_bool(&self) -> Option { match self.0 { - LiteralContent::SimpleLiteral(ref value) | LiteralContent::String(ref value) => { - Some(!value.is_empty()) - } + LiteralContent::String(ref value) => Some(!value.is_empty()), LiteralContent::Boolean(value) => Some(value), LiteralContent::Float(value) => Some(!value.is_zero()), LiteralContent::Double(value) => Some(!value.is_zero()), @@ -302,9 +298,7 @@ impl Literal { LiteralContent::Integer(value) => value.to_f32(), LiteralContent::Decimal(value) => value.to_f32(), LiteralContent::Boolean(value) => Some(if value { 1. } else { 0. }), - LiteralContent::SimpleLiteral(ref value) | LiteralContent::String(ref value) => { - value.parse().ok() - } + LiteralContent::String(ref value) => value.parse().ok(), _ => None, } } @@ -317,9 +311,7 @@ impl Literal { LiteralContent::Integer(value) => value.to_f64(), LiteralContent::Decimal(value) => value.to_f64(), LiteralContent::Boolean(value) => Some(if value { 1. } else { 0. }), - LiteralContent::SimpleLiteral(ref value) | LiteralContent::String(ref value) => { - value.parse().ok() - } + LiteralContent::String(ref value) => value.parse().ok(), _ => None, } } @@ -332,9 +324,7 @@ impl Literal { LiteralContent::Integer(value) => value.to_i128(), LiteralContent::Decimal(value) => value.to_i128(), LiteralContent::Boolean(value) => Some(if value { 1 } else { 0 }), - LiteralContent::SimpleLiteral(ref value) | LiteralContent::String(ref value) => { - value.parse().ok() - } + LiteralContent::String(ref value) => value.parse().ok(), _ => None, } } @@ -351,9 +341,7 @@ impl Literal { } else { Decimal::zero() }), - LiteralContent::SimpleLiteral(ref value) | LiteralContent::String(ref value) => { - value.parse().ok() - } + LiteralContent::String(ref value) => value.parse().ok(), _ => None, } } diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index ccc4d05c..1e412c2b 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -445,32 +445,30 @@ impl SimpleEvaluator { PlanExpression::UnaryNot(e) => self .to_bool(self.eval_expression(e, tuple)?) .map(|v| (!v).into()), - PlanExpression::Str(e) => Some(EncodedTerm::SimpleLiteral { + PlanExpression::Str(e) => Some(EncodedTerm::StringLiteral { value_id: self.to_string_id(self.eval_expression(e, tuple)?)?, }), PlanExpression::Lang(e) => match self.eval_expression(e, tuple)? { EncodedTerm::LangStringLiteral { language_id, .. } => { - Some(EncodedTerm::SimpleLiteral { + Some(EncodedTerm::StringLiteral { value_id: language_id, }) } - e if e.is_literal() => Some(ENCODED_EMPTY_SIMPLE_LITERAL), + e if e.is_literal() => Some(ENCODED_EMPTY_STRING_LITERAL), _ => None, }, PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(), PlanExpression::Bound(v) => Some(has_tuple_value(*v, tuple).into()), PlanExpression::IRI(e) => match self.eval_expression(e, tuple)? { EncodedTerm::NamedNode { iri_id } => Some(EncodedTerm::NamedNode { iri_id }), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { Some(EncodedTerm::NamedNode { iri_id: value_id }) } _ => None, }, PlanExpression::BNode(id) => match id { Some(id) => match self.eval_expression(id, tuple)? { - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => Some( + EncodedTerm::StringLiteral { value_id } => Some( self.bnodes_map .lock() .ok()? @@ -525,7 +523,7 @@ impl SimpleEvaluator { .insert_str(&Uuid::new_v4().to_urn().to_string()) .ok()?, }), - PlanExpression::StrUUID() => Some(EncodedTerm::SimpleLiteral { + PlanExpression::StrUUID() => Some(EncodedTerm::StringLiteral { value_id: self .store .insert_str(&Uuid::new_v4().to_simple().to_string()) @@ -624,8 +622,7 @@ impl SimpleEvaluator { } PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? { EncodedTerm::BooleanLiteral(value) => Some(value.into()), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { match &*self.store.get_str(value_id).ok()? { "true" | "1" => Some(true.into()), "false" | "0" => Some(false.into()), @@ -642,8 +639,7 @@ impl SimpleEvaluator { EncodedTerm::BooleanLiteral(value) => { Some(if value { 1. as f64 } else { 0. }.into()) } - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DoubleLiteral( + EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DoubleLiteral( OrderedFloat(self.store.get_str(value_id).ok()?.parse().ok()?), )), _ => None, @@ -656,8 +652,7 @@ impl SimpleEvaluator { EncodedTerm::BooleanLiteral(value) => { Some(if value { 1. as f32 } else { 0. }.into()) } - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::FloatLiteral( + EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::FloatLiteral( OrderedFloat(self.store.get_str(value_id).ok()?.parse().ok()?), )), _ => None, @@ -668,8 +663,7 @@ impl SimpleEvaluator { EncodedTerm::IntegerLiteral(value) => Some(value.to_i128()?.into()), EncodedTerm::DecimalLiteral(value) => Some(value.to_i128()?.into()), EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral( + EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral( self.store.get_str(value_id).ok()?.parse().ok()?, )), _ => None, @@ -687,8 +681,7 @@ impl SimpleEvaluator { } .into(), ), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral( + EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral( self.store.get_str(value_id).ok()?.parse().ok()?, )), _ => None, @@ -697,8 +690,7 @@ impl SimpleEvaluator { EncodedTerm::NaiveDate(value) => Some(value.into()), EncodedTerm::DateTime(value) => Some(value.date().naive_utc().into()), //TODO: use date with timezone EncodedTerm::NaiveDateTime(value) => Some(value.date().into()), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { let value = self.store.get_str(value_id).ok()?; Some(NaiveDate::parse_from_str(&value, "%Y-%m-%d").ok()?.into()) } @@ -708,8 +700,7 @@ impl SimpleEvaluator { EncodedTerm::NaiveTime(value) => Some(value.into()), EncodedTerm::DateTime(value) => Some(value.time().into()), EncodedTerm::NaiveDateTime(value) => Some(value.time().into()), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { let value = self.store.get_str(value_id).ok()?; Some(NaiveTime::parse_from_str(&value, "%H:%M:%S").ok()?.into()) } @@ -718,8 +709,7 @@ impl SimpleEvaluator { PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? { EncodedTerm::DateTime(value) => Some(value.into()), EncodedTerm::NaiveDateTime(value) => Some(value.into()), - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { let value = self.store.get_str(value_id).ok()?; Some(match DateTime::parse_from_rfc3339(&value) { Ok(value) => value.into(), @@ -739,7 +729,6 @@ impl SimpleEvaluator { fn to_bool(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::BooleanLiteral(value) => Some(value), - EncodedTerm::SimpleLiteral { .. } => Some(term != ENCODED_EMPTY_SIMPLE_LITERAL), EncodedTerm::StringLiteral { .. } => Some(term != ENCODED_EMPTY_STRING_LITERAL), EncodedTerm::FloatLiteral(value) => Some(!value.is_zero()), EncodedTerm::DoubleLiteral(value) => Some(!value.is_zero()), @@ -754,8 +743,7 @@ impl SimpleEvaluator { EncodedTerm::DefaultGraph {} => None, EncodedTerm::NamedNode { iri_id } => Some(iri_id), EncodedTerm::BlankNode(_) => None, - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } + EncodedTerm::StringLiteral { value_id } | EncodedTerm::LangStringLiteral { value_id, .. } | EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id), EncodedTerm::BooleanLiteral(value) => self @@ -774,7 +762,7 @@ impl SimpleEvaluator { } fn to_simple_string(&self, term: EncodedTerm) -> Option { - if let EncodedTerm::SimpleLiteral { value_id } = term { + if let EncodedTerm::StringLiteral { value_id } = term { Some(self.store.get_str(value_id).ok()?.into()) } else { None @@ -782,7 +770,7 @@ impl SimpleEvaluator { } fn to_simple_string_id(&self, term: EncodedTerm) -> Option { - if let EncodedTerm::SimpleLiteral { value_id } = term { + if let EncodedTerm::StringLiteral { value_id } = term { Some(value_id) } else { None @@ -791,8 +779,7 @@ impl SimpleEvaluator { fn to_string(&self, term: EncodedTerm) -> Option { match term { - EncodedTerm::SimpleLiteral { value_id } - | EncodedTerm::StringLiteral { value_id } + EncodedTerm::StringLiteral { value_id } | EncodedTerm::LangStringLiteral { value_id, .. } => { Some(self.store.get_str(value_id).ok()?.into()) } @@ -931,12 +918,13 @@ impl SimpleEvaluator { fn partial_cmp_literals(&self, a: EncodedTerm, b: EncodedTerm) -> Option { match a { - EncodedTerm::SimpleLiteral { value_id: a } - | EncodedTerm::StringLiteral { value_id: a } => match b { - EncodedTerm::SimpleLiteral { value_id: b } - | EncodedTerm::StringLiteral { value_id: b } => self.compare_str_ids(a, b), - _ => None, - }, + EncodedTerm::StringLiteral { value_id: a } => { + if let EncodedTerm::StringLiteral { value_id: b } = b { + self.compare_str_ids(a, b) + } else { + None + } + } EncodedTerm::FloatLiteral(a) => match b { EncodedTerm::FloatLiteral(b) => (*a).partial_cmp(&*b), EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b), diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index a6b1c940..437e0a96 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -128,7 +128,6 @@ impl StringStore for MemoryStringStore { const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_BLANK_NODE_ID: u8 = 2; -const TYPE_SIMPLE_LITERAL_ID: u8 = 3; const TYPE_LANG_STRING_LITERAL_ID: u8 = 4; const TYPE_TYPED_LITERAL_ID: u8 = 5; const TYPE_STRING_LITERAL: u8 = 6; @@ -144,9 +143,6 @@ const TYPE_NAIVE_DATE_LITERAL: u8 = 15; const TYPE_NAIVE_TIME_LITERAL: u8 = 16; pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {}; -pub static ENCODED_EMPTY_SIMPLE_LITERAL: EncodedTerm = EncodedTerm::SimpleLiteral { - value_id: EMPTY_STRING_ID, -}; pub static ENCODED_EMPTY_STRING_LITERAL: EncodedTerm = EncodedTerm::StringLiteral { value_id: EMPTY_STRING_ID, }; @@ -186,10 +182,9 @@ pub enum EncodedTerm { DefaultGraph {}, NamedNode { iri_id: u64 }, BlankNode(Uuid), - SimpleLiteral { value_id: u64 }, + StringLiteral { value_id: u64 }, LangStringLiteral { value_id: u64, language_id: u64 }, TypedLiteral { value_id: u64, datatype_id: u64 }, - StringLiteral { value_id: u64 }, BooleanLiteral(bool), FloatLiteral(OrderedFloat), DoubleLiteral(OrderedFloat), @@ -218,10 +213,9 @@ impl EncodedTerm { pub fn is_literal(&self) -> bool { match self { - EncodedTerm::SimpleLiteral { .. } + EncodedTerm::StringLiteral { .. } | EncodedTerm::LangStringLiteral { .. } | EncodedTerm::TypedLiteral { .. } - | EncodedTerm::StringLiteral { .. } | EncodedTerm::BooleanLiteral(_) | EncodedTerm::FloatLiteral(_) | EncodedTerm::DoubleLiteral(_) @@ -237,9 +231,7 @@ impl EncodedTerm { pub fn datatype(&self) -> Option { match self { - EncodedTerm::SimpleLiteral { .. } | EncodedTerm::StringLiteral { .. } => { - Some(ENCODED_XSD_STRING_NAMED_NODE) - } + EncodedTerm::StringLiteral { .. } => Some(ENCODED_XSD_STRING_NAMED_NODE), EncodedTerm::LangStringLiteral { .. } => Some(ENCODED_RDF_LANG_STRING_NAMED_NODE), EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { iri_id: *datatype_id, @@ -263,10 +255,9 @@ impl EncodedTerm { EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID, - EncodedTerm::SimpleLiteral { .. } => TYPE_SIMPLE_LITERAL_ID, + EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, - EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL, EncodedTerm::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE, EncodedTerm::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE, EncodedTerm::FloatLiteral(_) => TYPE_FLOAT_LITERAL, @@ -396,9 +387,6 @@ impl TermReader for R { self.read_exact(&mut uuid_buffer)?; Ok(EncodedTerm::BlankNode(Uuid::from_bytes(uuid_buffer))) } - TYPE_SIMPLE_LITERAL_ID => Ok(EncodedTerm::SimpleLiteral { - value_id: self.read_u64::()?, - }), TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral { language_id: self.read_u64::()?, value_id: self.read_u64::()?, @@ -511,7 +499,7 @@ impl TermWriter for R { EncodedTerm::DefaultGraph {} => {} EncodedTerm::NamedNode { iri_id } => self.write_u64::(iri_id)?, EncodedTerm::BlankNode(id) => self.write_all(id.as_bytes())?, - EncodedTerm::SimpleLiteral { value_id } | EncodedTerm::StringLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { self.write_u64::(value_id)?; } EncodedTerm::LangStringLiteral { @@ -605,7 +593,7 @@ impl Encoder { language_id: self.string_store.insert_str(language)?, } } else { - EncodedTerm::SimpleLiteral { + EncodedTerm::StringLiteral { value_id: self.string_store.insert_str(&literal.value())?, } } @@ -715,7 +703,7 @@ impl Encoder { Ok(NamedNode::from(self.string_store.get_url(iri_id)?).into()) } EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), - EncodedTerm::SimpleLiteral { value_id } => { + EncodedTerm::StringLiteral { value_id } => { Ok(Literal::new_simple_literal(self.string_store.get_str(value_id)?).into()) } EncodedTerm::LangStringLiteral { @@ -734,9 +722,6 @@ impl Encoder { NamedNode::from(self.string_store.get_url(datatype_id)?), ) .into()), - EncodedTerm::StringLiteral { value_id } => { - Ok(Literal::from(self.string_store.get_str(value_id)?.into()).into()) - } EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(value).into()), EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()), EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()), diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 8f1fcafd..e29d6582 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -158,6 +158,8 @@ fn sparql_w3c_query_evaluation_testsuite() { NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-not-eq", ).unwrap(), + //Simple literal vs xsd:string. We apply RDF 1.1 + NamedNode::from_str("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-2").unwrap(), //URI normalization: we are normalizing more strongly NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-3",