From 8bc84b28205606f3fc8f1e3231fdacca42a99c50 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 4 Aug 2020 15:34:46 +0200 Subject: [PATCH] Adds borrowed data structures for RDF terms --- js/src/model.rs | 2 +- js/src/store.rs | 18 +- lib/Cargo.toml | 1 - lib/src/io/write.rs | 10 +- lib/src/lib.rs | 2 +- lib/src/model/blank_node.rs | 207 ++++++++-- lib/src/model/literal.rs | 300 +++++++++++--- lib/src/model/mod.rs | 11 +- lib/src/model/named_node.rs | 119 +++++- lib/src/model/triple.rs | 663 ++++++++++++++++++++++++++---- lib/src/model/vocab.rs | 386 +++++++++-------- lib/src/sparql/algebra.rs | 12 + lib/src/sparql/eval.rs | 9 +- lib/src/sparql/parser.rs | 34 +- lib/src/sparql/plan_builder.rs | 13 +- lib/src/store/memory.rs | 46 +-- lib/src/store/mod.rs | 73 +--- lib/src/store/numeric_encoder.rs | 101 ++--- lib/src/store/rocksdb.rs | 106 +++-- lib/src/store/sled.rs | 106 +++-- python/src/memory_store.rs | 8 +- python/src/model.rs | 2 +- python/src/sled_store.rs | 8 +- testsuite/Cargo.toml | 1 - testsuite/src/manifest.rs | 97 ++--- testsuite/src/sparql_evaluator.rs | 182 ++++---- testsuite/src/vocab.rs | 115 +++--- wikibase/src/loader.rs | 8 +- 28 files changed, 1781 insertions(+), 859 deletions(-) diff --git a/js/src/model.rs b/js/src/model.rs index 668ae963..a15532b1 100644 --- a/js/src/model.rs +++ b/js/src/model.rs @@ -246,7 +246,7 @@ impl JsLiteral { #[wasm_bindgen(getter)] pub fn datatype(&self) -> JsNamedNode { - self.inner.datatype().clone().into() + self.inner.datatype().into_owned().into() } pub fn equals(&self, other: &JsValue) -> bool { diff --git a/js/src/store.rs b/js/src/store.rs index 36324ee6..991c6618 100644 --- a/js/src/store.rs +++ b/js/src/store.rs @@ -3,10 +3,10 @@ use crate::model::*; use crate::utils::to_err; use js_sys::{Array, Map}; use oxigraph::io::{DatasetFormat, GraphFormat}; -use oxigraph::model::GraphName; +use oxigraph::model::*; use oxigraph::sparql::{QueryOptions, QueryResult}; use oxigraph::MemoryStore; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::io::Cursor; use wasm_bindgen::prelude::*; @@ -74,25 +74,29 @@ impl JsMemoryStore { } else { None } - .as_ref(), + .as_ref() + .map(|t: &NamedOrBlankNode| t.into()), if let Some(predicate) = self.from_js.to_optional_term(predicate)? { - Some(predicate.try_into()?) + Some(NamedNode::try_from(predicate)?) } else { None } - .as_ref(), + .as_ref() + .map(|t: &NamedNode| t.into()), if let Some(object) = self.from_js.to_optional_term(object)? { Some(object.try_into()?) } else { None } - .as_ref(), + .as_ref() + .map(|t: &Term| t.into()), if let Some(graph_name) = self.from_js.to_optional_term(graph_name)? { Some(graph_name.try_into()?) } else { None } - .as_ref(), + .as_ref() + .map(|t: &GraphName| t.into()), ) .map(|v| JsQuad::from(v).into()) .collect::>() diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 48f340f7..cbb1ed00 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -16,7 +16,6 @@ edition = "2018" all-features = true [dependencies] -lazy_static = "1" rocksdb = { version = "0.14", optional = true } sled = { version = "0.34", optional = true } quick-xml = "0.18" diff --git a/lib/src/io/write.rs b/lib/src/io/write.rs index 09cedbf0..3a202bd2 100644 --- a/lib/src/io/write.rs +++ b/lib/src/io/write.rs @@ -88,7 +88,8 @@ enum TripleWriterKind { } impl TripleWriter { - pub fn write(&mut self, triple: &Triple) -> Result<(), io::Error> { + pub fn write<'a>(&mut self, triple: impl Into>) -> Result<(), io::Error> { + let triple = triple.into(); match &mut self.formatter { TripleWriterKind::NTriples(formatter) => formatter.format(&triple.into())?, TripleWriterKind::Turtle(formatter) => formatter.format(&triple.into())?, @@ -187,10 +188,11 @@ enum QuadWriterKind { } impl QuadWriter { - pub fn write(&mut self, triple: &Quad) -> Result<(), io::Error> { + pub fn write<'a>(&mut self, quad: impl Into>) -> Result<(), io::Error> { + let quad = quad.into(); match &mut self.formatter { - QuadWriterKind::NQuads(formatter) => formatter.format(&triple.into())?, - QuadWriterKind::TriG(formatter) => formatter.format(&triple.into())?, + QuadWriterKind::NQuads(formatter) => formatter.format(&quad.into())?, + QuadWriterKind::TriG(formatter) => formatter.format(&quad.into())?, } Ok(()) } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index b7d893b8..d6e2b965 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -27,7 +27,7 @@ //! store.insert(quad.clone()); //! //! // quad filter -//! let results: Vec = store.quads_for_pattern(Some(&ex.clone().into()), None, None, None).collect(); +//! let results: Vec = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect(); //! assert_eq!(vec![quad], results); //! //! // SPARQL query diff --git a/lib/src/model/blank_node.rs b/lib/src/model/blank_node.rs index 59081e9b..5c395f76 100644 --- a/lib/src/model/blank_node.rs +++ b/lib/src/model/blank_node.rs @@ -5,7 +5,7 @@ use std::fmt; use std::io::Write; use std::str; -/// An RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// /// The common way to create a new blank node is to use the `BlankNode::default` trait method. /// @@ -28,7 +28,7 @@ pub struct BlankNode(BlankNodeContent); #[derive(PartialEq, Eq, Debug, Clone, Hash)] enum BlankNodeContent { Named(String), - Anonymous { id: u128, str: [u8; 32] }, + Anonymous { id: u128, str: IdStr }, } impl BlankNode { @@ -52,13 +52,8 @@ impl BlankNode { /// Except if you really know what you do, you should use [`new`](#method.new). pub fn new_unchecked(id: impl Into) -> Self { let id = id.into(); - if let Ok(numerical_id) = u128::from_str_radix(&id, 16) { - let result = Self::new_from_unique_id(numerical_id); - if result.as_str() == id { - result - } else { - Self(BlankNodeContent::Named(id)) - } + if let Some(numerical_id) = to_integer_id(&id) { + Self::new_from_unique_id(numerical_id) } else { Self(BlankNodeContent::Named(id)) } @@ -69,19 +64,17 @@ impl BlankNode { /// In most cases, it is much more convenient to create a blank node using `BlankNode::default()`. pub fn new_from_unique_id(id: impl Into) -> Self { let id = id.into(); - let mut str = [0; 32]; - write!(&mut str[..], "{:x}", id).unwrap(); - Self(BlankNodeContent::Anonymous { id, str }) + Self(BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }) } /// Returns the underlying ID of this blank node pub fn as_str(&self) -> &str { match &self.0 { BlankNodeContent::Named(id) => id, - BlankNodeContent::Anonymous { str, .. } => { - let len = str.iter().position(|x| x == &0).unwrap_or(32); - str::from_utf8(&str[..len]).unwrap() - } + BlankNodeContent::Anonymous { str, .. } => str.as_str(), } } @@ -89,25 +82,24 @@ impl BlankNode { pub fn into_string(self) -> String { match self.0 { BlankNodeContent::Named(id) => id, - BlankNodeContent::Anonymous { str, .. } => { - let len = str.iter().position(|x| x == &0).unwrap_or(32); - str::from_utf8(&str[..len]).unwrap().to_owned() - } + BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(), } } - /// Returns the internal numerical ID of this blank node, if it exists - pub(crate) fn id(&self) -> Option { - match self.0 { - BlankNodeContent::Named(_) => None, - BlankNodeContent::Anonymous { id, .. } => Some(id), - } + pub fn as_ref(&self) -> BlankNodeRef<'_> { + BlankNodeRef(match &self.0 { + BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()), + BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous { + id: *id, + str: str.as_str(), + }, + }) } } impl fmt::Display for BlankNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - rio::BlankNode::from(self).fmt(f) + self.as_ref().fmt(f) } } @@ -118,12 +110,140 @@ impl Default for BlankNode { } } -impl<'a> From<&'a BlankNode> for rio::BlankNode<'a> { +/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The common way to create a new blank node is to use the `BlankNode::default` trait method. +/// +/// It is also possible to create a blank node from a blank node identifier using the `BlankNodeRef::new` method. +/// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. +/// +/// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation: +/// ``` +/// use oxigraph::model::BlankNodeRef; +/// +/// assert_eq!( +/// "_:a122", +/// BlankNodeRef::new("a122")?.to_string() +/// ); +/// # Result::<_,oxigraph::model::BlankNodeIdParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>); + +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] +enum BlankNodeRefContent<'a> { + Named(&'a str), + Anonymous { id: u128, str: &'a str }, +} + +impl<'a> BlankNodeRef<'a> { + /// Creates a blank node from a unique identifier. + /// + /// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. + /// + /// In most cases, it is much more convenient to create a blank node using `BlankNode::default()`. + /// `BlankNode::default()` creates a random ID that could be easily inlined by Oxigraph stores. + pub fn new(id: &'a str) -> Result { + validate_blank_node_identifier(id)?; + Ok(Self::new_unchecked(id)) + } + + /// Creates a blank node from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to N-Triples, Turtle and SPARQL grammars. + /// + /// Except if you really know what you do, you should use [`new`](#method.new). + pub fn new_unchecked(id: &'a str) -> Self { + if let Some(numerical_id) = to_integer_id(id) { + Self(BlankNodeRefContent::Anonymous { + id: numerical_id, + str: id, + }) + } else { + Self(BlankNodeRefContent::Named(id)) + } + } + + /// Returns the underlying ID of this blank node + pub fn as_str(self) -> &'a str { + match self.0 { + BlankNodeRefContent::Named(id) => id, + BlankNodeRefContent::Anonymous { str, .. } => str, + } + } + + /// Returns the internal numerical ID of this blank node, if it exists + pub(crate) fn id(&self) -> Option { + match self.0 { + BlankNodeRefContent::Named(_) => None, + BlankNodeRefContent::Anonymous { id, .. } => Some(id), + } + } + + pub fn into_owned(self) -> BlankNode { + BlankNode(match self.0 { + BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()), + BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }, + }) + } +} + +impl fmt::Display for BlankNodeRef<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + rio::BlankNode::from(*self).fmt(f) + } +} + +impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> { fn from(node: &'a BlankNode) -> Self { + node.as_ref() + } +} + +impl<'a> From> for BlankNode { + fn from(node: BlankNodeRef<'a>) -> Self { + node.into_owned() + } +} + +impl<'a> From> for rio::BlankNode<'a> { + fn from(node: BlankNodeRef<'a>) -> Self { rio::BlankNode { id: node.as_str() } } } +impl PartialEq for BlankNodeRef<'_> { + fn eq(&self, other: &BlankNode) -> bool { + *self == other.as_ref() + } +} + +impl PartialEq> for BlankNode { + fn eq(&self, other: &BlankNodeRef<'_>) -> bool { + self.as_ref() == *other + } +} + +#[derive(PartialEq, Eq, Debug, Clone, Hash)] +struct IdStr([u8; 32]); + +impl IdStr { + fn new(id: u128) -> Self { + let mut str = [0; 32]; + write!(&mut str[..], "{:x}", id).unwrap(); + Self(str) + } + + fn as_str(&self) -> &str { + let len = self.0.iter().position(|x| x == &0).unwrap_or(32); + str::from_utf8(&self.0[..len]).unwrap() + } +} + fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { let mut chars = id.chars(); let front = chars.next().ok_or(BlankNodeIdParseError {})?; @@ -183,6 +303,25 @@ fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> } } +fn to_integer_id(id: &str) -> Option { + let digits = id.as_bytes(); + let mut value: u128 = 0; + if let None | Some(b'0') = digits.first() { + return None; // No empty string or leading zeros + } + for digit in digits { + value = value.checked_mul(16)?.checked_add( + match *digit { + b'0'..=b'9' => digit - b'0', + b'a'..=b'f' => digit - b'a' + 10, + _ => return None, + } + .into(), + )?; + } + Some(value) +} + /// An error raised during `BlankNode` validation. #[allow(missing_copy_implementations)] #[derive(Debug)] @@ -234,4 +373,16 @@ mod test { BlankNode::new_from_unique_id(0x100a_u128) ); } + + #[test] + fn test_equals() { + assert_eq!( + BlankNode::new("100a").unwrap(), + BlankNodeRef::new("100a").unwrap() + ); + assert_eq!( + BlankNode::new("zzz").unwrap(), + BlankNodeRef::new("zzz").unwrap() + ); + } } diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index b56b87c7..5d340a27 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -2,13 +2,14 @@ use crate::model::named_node::NamedNode; use crate::model::vocab::rdf; use crate::model::vocab::xsd; use crate::model::xsd::*; +use crate::model::NamedNodeRef; use oxilangtag::{LanguageTag, LanguageTagParseError}; use rio_api::model as rio; use std::borrow::Cow; use std::fmt; use std::option::Option; -/// An RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) +/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) /// /// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation: /// ``` @@ -23,7 +24,7 @@ use std::option::Option; /// /// assert_eq!( /// "\"1999-01-01\"^^", -/// Literal::new_typed_literal("1999-01-01", xsd::DATE.clone()).to_string() +/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string() /// ); /// /// assert_eq!( @@ -52,7 +53,7 @@ impl Literal { pub fn new_typed_literal(value: impl Into, datatype: impl Into) -> Self { let value = value.into(); let datatype = datatype.into(); - Literal(if datatype == *xsd::STRING { + Literal(if datatype == xsd::STRING { LiteralContent::String(value) } else { LiteralContent::TypedLiteral { value, datatype } @@ -66,10 +67,10 @@ impl Literal { ) -> Result { let mut language = language.into(); language.make_ascii_lowercase(); - Ok(Literal(LiteralContent::LanguageTaggedString { - value: value.into(), - language: LanguageTag::parse(language)?.into_inner(), - })) + Ok(Self::new_language_tagged_literal_unchecked( + value, + LanguageTag::parse(language)?.into_inner(), + )) } /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) @@ -92,11 +93,7 @@ impl Literal { /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) pub fn value(&self) -> &str { - match &self.0 { - LiteralContent::String(value) - | LiteralContent::LanguageTaggedString { value, .. } - | LiteralContent::TypedLiteral { value, .. } => value, - } + self.as_ref().value() } /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). @@ -104,33 +101,36 @@ impl Literal { /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). /// They are normalized to lowercase by this implementation. pub fn language(&self) -> Option<&str> { - match &self.0 { - LiteralContent::LanguageTaggedString { language, .. } => Some(language), - _ => None, - } + self.as_ref().language() } /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). /// /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](http://www.w3.org/1999/02/22-rdf-syntax-ns#langString). /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string). - pub fn datatype(&self) -> &NamedNode { - match &self.0 { - LiteralContent::String(_) => &xsd::STRING, - LiteralContent::LanguageTaggedString { .. } => &rdf::LANG_STRING, - LiteralContent::TypedLiteral { datatype, .. } => datatype, - } + pub fn datatype(&self) -> NamedNodeRef<'_> { + self.as_ref().datatype() } - /// Checks if this lieteral could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal). + /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal). /// /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) /// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string). pub fn is_plain(&self) -> bool { - match self.0 { - LiteralContent::String(_) | LiteralContent::LanguageTaggedString { .. } => true, - _ => false, - } + self.as_ref().is_plain() + } + + pub fn as_ref(&self) -> LiteralRef<'_> { + LiteralRef(match &self.0 { + LiteralContent::String(value) => LiteralRefContent::String(value), + LiteralContent::LanguageTaggedString { value, language } => { + LiteralRefContent::LanguageTaggedString { value, language } + } + LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral { + value, + datatype: datatype.as_ref(), + }, + }) } /// Extract components from this literal @@ -147,7 +147,7 @@ impl Literal { impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - rio::Literal::from(self).fmt(f) + self.as_ref().fmt(f) } } @@ -173,7 +173,7 @@ impl From for Literal { fn from(value: bool) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::BOOLEAN.clone(), + datatype: xsd::BOOLEAN.into(), }) } } @@ -182,7 +182,7 @@ impl From for Literal { fn from(value: i128) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -191,7 +191,7 @@ impl From for Literal { fn from(value: i64) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -200,7 +200,7 @@ impl From for Literal { fn from(value: i32) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -209,7 +209,7 @@ impl From for Literal { fn from(value: i16) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -218,7 +218,7 @@ impl From for Literal { fn from(value: u64) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -227,7 +227,7 @@ impl From for Literal { fn from(value: u32) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -236,7 +236,7 @@ impl From for Literal { fn from(value: u16) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::INTEGER.clone(), + datatype: xsd::INTEGER.into(), }) } } @@ -245,7 +245,7 @@ impl From for Literal { fn from(value: f32) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::FLOAT.clone(), + datatype: xsd::FLOAT.into(), }) } } @@ -254,7 +254,7 @@ impl From for Literal { fn from(value: f64) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::DOUBLE.clone(), + datatype: xsd::DOUBLE.into(), }) } } @@ -263,7 +263,7 @@ impl From for Literal { fn from(value: Decimal) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::DECIMAL.clone(), + datatype: xsd::DECIMAL.into(), }) } } @@ -272,7 +272,7 @@ impl From for Literal { fn from(value: Date) -> Self { Literal(LiteralContent::TypedLiteral { value: value.to_string(), - datatype: xsd::DATE.clone(), + datatype: xsd::DATE.into(), }) } } @@ -281,7 +281,7 @@ impl From