From 2f706a777ffc6c95599599bb376aaef65cabf836 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 22 Aug 2019 12:21:18 +0200 Subject: [PATCH] Adds our own IRI normalization instead of relying on Url crate --- lib/Cargo.toml | 2 - lib/src/lib.rs | 2 +- lib/src/model/graph.rs | 2 +- lib/src/model/iri.rs | 632 ++++++++++++++++++++++++++ lib/src/model/literal.rs | 9 +- lib/src/model/mod.rs | 3 + lib/src/model/named_node.rs | 19 +- lib/src/model/vocab.rs | 124 ++--- lib/src/repository.rs | 10 +- lib/src/sparql/parser.rs | 18 +- lib/src/sparql/sparql_grammar.rustpeg | 39 +- lib/src/sparql/xml_results.rs | 14 +- lib/src/store/memory.rs | 2 +- lib/src/store/numeric_encoder.rs | 12 +- lib/src/store/rocksdb.rs | 2 +- lib/tests/sparql_test_cases.rs | 95 ++-- 16 files changed, 822 insertions(+), 163 deletions(-) create mode 100644 lib/src/model/iri.rs diff --git a/lib/Cargo.toml b/lib/Cargo.toml index fdc89211..ce2b94e3 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -18,9 +18,7 @@ travis-ci = { repository = "Tpt/rudf" } [dependencies] lazy_static = "1" rocksdb = { version = "0.12", optional = true } -url = "2" uuid = { version = "0.7", features = ["v4"] } -bzip2 = "0.3" byteorder = {version="1", features = ["i128"] } quick-xml = "0.15" ordered-float = "1" diff --git a/lib/src/lib.rs b/lib/src/lib.rs index e67c8924..93ca1ff1 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -18,7 +18,7 @@ //! let connection = repository.connection().unwrap(); //! //! // insertion -//! let ex = NamedNode::new("http://example.com"); +//! let ex = NamedNode::parse("http://example.com").unwrap(); //! let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), None); //! connection.insert(&quad); //! diff --git a/lib/src/model/graph.rs b/lib/src/model/graph.rs index c1da0161..c7b0c5bc 100644 --- a/lib/src/model/graph.rs +++ b/lib/src/model/graph.rs @@ -14,7 +14,7 @@ use std::iter::FromIterator; /// use rudf::model::SimpleGraph; /// /// let mut graph = SimpleGraph::default(); -/// let ex = NamedNode::new("http://example.com"); +/// let ex = NamedNode::parse("http://example.com").unwrap(); /// let triple = Triple::new(ex.clone(), ex.clone(), ex.clone()); /// graph.insert(triple.clone()); /// let results: Vec = graph.triples_for_subject(&ex.into()).cloned().collect(); diff --git a/lib/src/model/iri.rs b/lib/src/model/iri.rs new file mode 100644 index 00000000..b1a5a160 --- /dev/null +++ b/lib/src/model/iri.rs @@ -0,0 +1,632 @@ +use std::error::Error; +use std::fmt; + +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Iri { + iri: String, + positions: IriElementsPositions, +} + +impl Iri { + pub fn parse(iri: String) -> Result { + let base_positions = + parse_iri(iri.as_bytes(), 0).map_err(|position| IriParseError { position })?; + Ok(Self { + iri, + positions: base_positions, + }) + } + + pub fn resolve(&self, iri: &str) -> Result { + let mut target_buffer = String::with_capacity(self.iri.len() + iri.len()); + let positions = resolve_relative_iri(iri, &self.iri, &self.positions, &mut target_buffer) + .map_err(|position| IriParseError { position })?; + Ok(Self { + iri: target_buffer, + positions, + }) + } + + pub fn into_string(self) -> String { + self.iri + } +} + +#[derive(Debug)] +pub struct IriParseError { + position: usize, +} + +impl fmt::Display for IriParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Invalid IRI at char {}", self.position) + } +} + +impl Error for IriParseError {} + +type IriState = Result; // usize = the end position + +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +struct IriElementsPositions { + scheme_end: usize, + authority_end: usize, + path_end: usize, + query_end: usize, + fragment_end: usize, +} + +// RFC 3986 5.2 Relative Resolution algorithm +fn resolve_relative_iri( + reference_iri: &str, + base_iri: &str, + base_positions: &IriElementsPositions, + target_buffer: &mut String, +) -> Result { + let base_scheme = &base_iri[0..base_positions.scheme_end]; + let base_authority = &base_iri[base_positions.scheme_end..base_positions.authority_end]; + let base_path = &base_iri[base_positions.authority_end..base_positions.path_end]; + let base_query = &base_iri[base_positions.path_end..base_positions.query_end]; + + let reference_positions = parse_iri_reference(reference_iri.as_bytes(), 0)?; + let r_scheme = &reference_iri[0..reference_positions.scheme_end]; + let r_authority = + &reference_iri[reference_positions.scheme_end..reference_positions.authority_end]; + let r_path = &reference_iri[reference_positions.authority_end..reference_positions.path_end]; + let r_query = &reference_iri[reference_positions.path_end..reference_positions.query_end]; + let r_fragment = &reference_iri[reference_positions.query_end..]; + + let scheme_end; + let authority_end; + let path_end; + let query_end; + let fragment_end; + + // if defined(R.scheme) then + if !r_scheme.is_empty() { + // T.scheme = R.scheme; + target_buffer.push_str(r_scheme); + scheme_end = target_buffer.len(); + + // T.authority = R.authority; + target_buffer.push_str(r_authority); + authority_end = target_buffer.len(); + + // T.path = remove_dot_segments(R.path); + append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); + path_end = target_buffer.len(); + + // T.query = R.query; + target_buffer.push_str(r_query); + query_end = target_buffer.len(); + + // T.fragment = R.fragment; + target_buffer.push_str(r_fragment); + fragment_end = target_buffer.len(); + } else { + // T.scheme = Base.scheme; + target_buffer.push_str(base_scheme); + scheme_end = target_buffer.len(); + + // if defined(R.authority) then + if !r_authority.is_empty() { + // T.authority = R.authority; + target_buffer.push_str(r_authority); + authority_end = target_buffer.len(); + + // T.path = remove_dot_segments(R.path); + append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); + path_end = target_buffer.len(); + + // T.query = R.query; + target_buffer.push_str(r_query); + query_end = target_buffer.len(); + + // T.fragment = R.fragment; + target_buffer.push_str(r_fragment); + fragment_end = target_buffer.len(); + } else { + // T.authority = Base.authority; + target_buffer.push_str(base_authority); + authority_end = target_buffer.len(); + + // if (R.path == "") then + if r_path == "" { + // T.path = Base.path; + target_buffer.push_str(base_path); + path_end = target_buffer.len(); + + // if defined(R.query) then + if !r_query.is_empty() { + // T.query = R.query; + target_buffer.push_str(r_query); + } else { + // T.query = Base.query; + target_buffer.push_str(base_query); + } + query_end = target_buffer.len(); + } else { + // if (R.path starts-with "/") then + if r_path.starts_with('/') { + // T.path = remove_dot_segments(R.path); + append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); + } else { + let path_start_in_target = target_buffer.len(); + // T.path = merge(Base.path, R.path); + // T.path = remove_dot_segments(T.path); + if base_positions.authority_end > base_positions.scheme_end + && base_positions.path_end == base_positions.authority_end + { + append_and_remove_dot_segments_with_extra_slash( + r_path, + target_buffer, + path_start_in_target, + ); + } else { + let last_base_slash = base_path + .char_indices() + .rev() + .find(|(_, c)| *c == '/') + .map_or(0, |(i, _)| i) + + base_positions.authority_end; + append_and_remove_dot_segments( + &base_iri[base_positions.authority_end..=last_base_slash], + target_buffer, + path_start_in_target, + ); + if target_buffer.ends_with('/') { + target_buffer.pop(); + append_and_remove_dot_segments_with_extra_slash( + r_path, + target_buffer, + path_start_in_target, + ); + } else { + append_and_remove_dot_segments( + r_path, + target_buffer, + path_start_in_target, + ); + } + } + } + path_end = target_buffer.len(); + + // T.query = R.query; + target_buffer.push_str(r_query); + query_end = target_buffer.len(); + } + // T.fragment = R.fragment; + target_buffer.push_str(r_fragment); + fragment_end = target_buffer.len(); + } + } + Ok(IriElementsPositions { + scheme_end, + authority_end, + path_end, + query_end, + fragment_end, + }) +} + +// RFC 3986 5.2.4 Remove Dot Segments +fn append_and_remove_dot_segments( + mut input: &str, + output: &mut String, + path_start_in_output: usize, +) { + while !input.is_empty() { + if input.starts_with("../") { + input = &input[3..]; + } else if input.starts_with("./") || input.starts_with("/./") { + input = &input[2..]; + } else if input == "/." { + input = "/"; + } else if input.starts_with("/../") { + pop_last_segment(output, path_start_in_output); + input = &input[3..]; + } else if input == "/.." { + pop_last_segment(output, path_start_in_output); + input = "/"; + } else if input == "." || input == ".." { + input = ""; + } else { + if input.starts_with('/') { + output.push('/'); + input = &input[1..]; + } + if let Some(i) = input.find('/') { + output.push_str(&input[..i]); + input = &input[i..]; + } else { + output.push_str(input); + input = ""; + } + } + } +} + +fn pop_last_segment(buffer: &mut String, path_start_in_buffer: usize) { + if let Some((last_slash_position, _)) = buffer[path_start_in_buffer..] + .char_indices() + .rev() + .find(|(_, c)| *c == '/') + { + buffer.truncate(last_slash_position + path_start_in_buffer) + } +} + +fn append_and_remove_dot_segments_with_extra_slash( + input: &str, + output: &mut String, + path_start_in_output: usize, +) { + if input.is_empty() { + output.push('/'); + } else if input.starts_with("./") { + append_and_remove_dot_segments(&input[1..], output, path_start_in_output) + } else if input == "." { + append_and_remove_dot_segments("/", output, path_start_in_output) + } else if input.starts_with("../") { + pop_last_segment(output, path_start_in_output); + append_and_remove_dot_segments(&input[2..], output, path_start_in_output) + } else if input == ".." { + pop_last_segment(output, path_start_in_output); + append_and_remove_dot_segments("/", output, path_start_in_output) + } else { + output.push('/'); + if let Some(i) = input.find('/') { + output.push_str(&input[..i]); + append_and_remove_dot_segments(&input[i..], output, path_start_in_output) + } else { + output.push_str(input); + } + } +} + +fn parse_iri(value: &[u8], start: usize) -> Result { + // IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] + let scheme_end = parse_scheme(value, start)?; + if scheme_end >= value.len() || value[scheme_end] != b':' { + return Err(scheme_end); + } + + let (authority_end, path_end) = parse_ihier_part(value, scheme_end + 1)?; + + let query_end = if path_end < value.len() && value[path_end] == b'?' { + parse_iquery(value, path_end + 1)? + } else { + path_end + }; + + let fragment_end = if query_end < value.len() && value[query_end] == b'#' { + parse_ifragment(value, query_end + 1)? + } else { + query_end + }; + + Ok(IriElementsPositions { + scheme_end: scheme_end + 1, + authority_end, + path_end, + query_end, + fragment_end, + }) +} + +fn parse_ihier_part(value: &[u8], start: usize) -> Result<(usize, usize), usize> { + // (authority_end, path_end) + // ihier-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-rootless / ipath-empty + if value[start..].starts_with(b"//") { + let authority_end = parse_iauthority(value, start + 2)?; + Ok((authority_end, parse_ipath_abempty(value, authority_end)?)) + } else if value[start..].starts_with(b"/") { + Ok((start, parse_ipath_absolute(value, start)?)) + } else { + match parse_ipath_rootless(value, start) { + Ok(i) => Ok((start, i)), + Err(i) => { + if i == start { + Ok((start, i)) // ipath empty + } else { + Err(i) + } + } + } + } +} + +fn parse_iri_reference(value: &[u8], start: usize) -> Result { + // IRI-reference = IRI / irelative-ref + match parse_iri(value, start) { + Ok(positions) => Ok(positions), + Err(_) => parse_irelative_ref(value, start), + } +} + +fn parse_irelative_ref(value: &[u8], start: usize) -> Result { + // irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] + let (authority_end, path_end) = parse_irelative_path(value, start)?; + + let query_end = if path_end < value.len() && value[path_end] == b'?' { + parse_iquery(value, path_end + 1)? + } else { + path_end + }; + let fragment_end = if query_end < value.len() && value[query_end] == b'#' { + parse_ifragment(&value, query_end + 1)? + } else { + query_end + }; + + Ok(IriElementsPositions { + scheme_end: start, + authority_end, + path_end, + query_end, + fragment_end, + }) +} + +fn parse_irelative_path(value: &[u8], start: usize) -> Result<(usize, usize), usize> { + // (authority_end, path_end) + // irelative-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-noscheme / ipath-empty + if value[start..].starts_with(b"//") { + let authority_end = parse_iauthority(&value, start + 2)?; + Ok((authority_end, parse_ipath_abempty(value, authority_end)?)) + } else if value[start..].starts_with(b"/") { + Ok((start, parse_ipath_absolute(value, start)?)) + } else { + match parse_ipath_noscheme(value, start) { + Ok(i) => Ok((start, i)), + Err(i) => { + if i == start { + Ok((start, i)) // ipath empty + } else { + Err(i) + } + } + } + } +} + +fn parse_scheme(value: &[u8], start: usize) -> IriState { + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if value.len() <= start || !is_alpha(value[start]) { + return Err(start); + } + for (i, c) in value[start..].iter().enumerate() { + match *c { + c if is_alpha(c) || is_digit(c) || c == b'+' || c == b'-' || c == b'.' => (), + _ => return Ok(start + i), + } + } + Err(value.len()) +} + +fn parse_iauthority(value: &[u8], start: usize) -> IriState { + // iauthority = [ iuserinfo "@" ] ihost [ ":" port ] + //TODO: implement properly + for (i, c) in value[start..].iter().enumerate() { + match *c { + b'/' | b'?' | b'#' => return Ok(start + i), + _ => (), + } + } + Ok(value.len()) +} + +fn parse_ipath_abempty(value: &[u8], start: usize) -> IriState { + // ipath-abempty = *( "/" isegment ) + let mut i = start; + while i < value.len() { + match value[i] { + b'/' => { + i = parse_isegment(value, i + 1)?; + } + _ => return Ok(i), + } + } + Ok(value.len()) +} + +fn parse_ipath_absolute(value: &[u8], start: usize) -> IriState { + // ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] = "/" [ isegment-nz ipath-abempty ] + if !value[start..].starts_with(b"/") { + return Err(start); + } + + match parse_isegment_nz(value, start + 1) { + Ok(i) => parse_ipath_abempty(value, i), + Err(i) => { + if i == start + 1 { + Ok(i) // optional + } else { + Err(i) + } + } + } +} + +fn parse_ipath_noscheme(value: &[u8], start: usize) -> IriState { + // ipath-noscheme = isegment-nz-nc *( "/" isegment ) = isegment-nz-nc ipath-abempty + let i = parse_isegment_nz_nc(value, start)?; + parse_ipath_abempty(&value, i) +} + +fn parse_ipath_rootless(value: &[u8], start: usize) -> IriState { + // ipath-rootless = isegment-nz *( "/" isegment ) = isegment-nz ipath-abempty + let i = parse_isegment_nz(value, start)?; + parse_ipath_abempty(value, i) +} + +fn parse_isegment(value: &[u8], start: usize) -> IriState { + // isegment = *ipchar + //TODO: implement properly + for (i, c) in value[start..].iter().enumerate() { + match *c { + b'/' | b'?' | b'#' => return Ok(start + i), + _ => (), + } + } + Ok(value.len()) +} + +fn parse_isegment_nz(value: &[u8], start: usize) -> IriState { + // isegment-nz = 1*ipchar + let i = parse_isegment(value, start)?; + if i == start { + Err(0) + } else { + Ok(i) + } +} + +fn parse_isegment_nz_nc(value: &[u8], start: usize) -> IriState { + // isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" ) + //TODO: implement properly + for (i, c) in value[start..].iter().enumerate() { + match *c { + b'/' | b'?' | b'#' | b':' => return if i == start { Err(i) } else { Ok(i) }, + _ => (), + } + } + Ok(value.len()) +} + +fn parse_iquery(value: &[u8], start: usize) -> IriState { + // iquery = *( ipchar / iprivate / "/" / "?" ) + //TODO: implement properly + for (i, c) in value[start..].iter().enumerate() { + if *c == b'#' { + return Ok(start + i); + } + } + Ok(value.len()) +} + +fn parse_ifragment(value: &[u8], _start: usize) -> IriState { + // ifragment = *( ipchar / "/" / "?" ) + //TODO: implement properly + Ok(value.len()) +} + +fn is_alpha(b: u8) -> bool { + match b { + b'a'..=b'z' | b'A'..=b'Z' => true, + _ => false, + } +} + +fn is_digit(b: u8) -> bool { + match b { + b'0'..=b'9' => true, + _ => false, + } +} + +#[test] +fn test_parsing() { + let examples = [ + "file://foo", + "ftp://ftp.is.co.za/rfc/rfc1808.txt", + "http://www.ietf.org/rfc/rfc2396.txt", + "ldap://[2001:db8::7]/c=GB?objectClass?one", + "mailto:John.Doe@example.com", + "news:comp.infosystems.www.servers.unix", + "tel:+1-816-555-1212", + "telnet://192.0.2.16:80/", + "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + "http://example.com", + "http://example.com/", + "http://example.com/foo", + "http://example.com/foo/bar", + "http://example.com/foo/bar/", + "http://example.com/foo/bar?q=1&r=2", + "http://example.com/foo/bar/?q=1&r=2", + "http://example.com#toto", + "http://example.com/#toto", + "http://example.com/foo#toto", + "http://example.com/foo/bar#toto", + "http://example.com/foo/bar/#toto", + "http://example.com/foo/bar?q=1&r=2#toto", + "http://example.com/foo/bar/?q=1&r=2#toto", + ]; + + for e in &examples { + assert!( + Iri::parse(e.to_string()).is_ok(), + "{} is not recognized as an IRI", + e + ); + } +} + +#[test] +fn test_resolve_relative_iri() { + let base = "http://a/b/c/d;p?q"; + + let examples = [ + ("g:h", "g:h"), + ("g", "http://a/b/c/g"), + ("g/", "http://a/b/c/g/"), + ("/g", "http://a/g"), + ("//g", "http://g"), + ("?y", "http://a/b/c/d;p?y"), + ("g?y", "http://a/b/c/g?y"), + ("#s", "http://a/b/c/d;p?q#s"), + ("g#s", "http://a/b/c/g#s"), + ("g?y#s", "http://a/b/c/g?y#s"), + (";x", "http://a/b/c/;x"), + ("g;x", "http://a/b/c/g;x"), + ("g;x?y#s", "http://a/b/c/g;x?y#s"), + ("", "http://a/b/c/d;p?q"), + (".", "http://a/b/c/"), + ("./", "http://a/b/c/"), + ("./g", "http://a/b/c/g"), + ("..", "http://a/b/"), + ("../", "http://a/b/"), + ("../g", "http://a/b/g"), + ("../..", "http://a/"), + ("../../", "http://a/"), + ("../../g", "http://a/g"), + ("../../../g", "http://a/g"), + ("../../../../g", "http://a/g"), + ("/./g", "http://a/g"), + ("/../g", "http://a/g"), + ("g.", "http://a/b/c/g."), + (".g", "http://a/b/c/.g"), + ("g..", "http://a/b/c/g.."), + ("..g", "http://a/b/c/..g"), + ("./../g", "http://a/b/g"), + ("./g/.", "http://a/b/c/g/"), + ("g/./h", "http://a/b/c/g/h"), + ("g/../h", "http://a/b/c/h"), + ("g;x=1/./y", "http://a/b/c/g;x=1/y"), + ("g;x=1/../y", "http://a/b/c/y"), + ("g?y/./x", "http://a/b/c/g?y/./x"), + ("g?y/../x", "http://a/b/c/g?y/../x"), + ("g#s/./x", "http://a/b/c/g#s/./x"), + ("g#s/../x", "http://a/b/c/g#s/../x"), + ("http:g", "http:g"), + ("./g:h", "http://a/b/c/g:h"), + ]; + + let base = Iri::parse(base.to_owned()).unwrap(); + for (input, output) in examples.iter() { + let result = base.resolve(input); + assert!( + result.is_ok(), + "Resolving of {} failed with error: {}", + input, + result.unwrap_err() + ); + let result = result.unwrap().into_string(); + assert_eq!( + result, *output, + "Resolving of {} is wrong. Found {} and expecting {}", + input, result, output + ); + } +} diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index 00be78fb..9616561a 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -133,14 +133,11 @@ impl Literal { value: impl Into, language: impl Into, ) -> Self { - let language = language.into(); + let mut language = language.into(); + language.make_ascii_lowercase(); Literal(LiteralContent::LanguageTaggedString { value: value.into(), - language: if language.bytes().all(|c| c.is_ascii_lowercase()) { - language - } else { - language.to_ascii_lowercase() - }, + language, }) } diff --git a/lib/src/model/mod.rs b/lib/src/model/mod.rs index 73c14182..c25becac 100644 --- a/lib/src/model/mod.rs +++ b/lib/src/model/mod.rs @@ -4,6 +4,7 @@ mod blank_node; mod graph; +mod iri; mod isomorphism; mod literal; mod named_node; @@ -12,6 +13,8 @@ pub mod vocab; pub use crate::model::blank_node::BlankNode; pub use crate::model::graph::SimpleGraph; +pub(crate) use crate::model::iri::Iri; +pub(crate) use crate::model::iri::IriParseError; pub use crate::model::literal::Literal; pub use crate::model::named_node::NamedNode; pub use crate::model::triple::NamedOrBlankNode; diff --git a/lib/src/model/named_node.rs b/lib/src/model/named_node.rs index 8e692807..2ea810d9 100644 --- a/lib/src/model/named_node.rs +++ b/lib/src/model/named_node.rs @@ -1,18 +1,17 @@ +use crate::model::Iri; +use crate::Result; use rio_api::model as rio; use std::fmt; /// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) /// -/// The common way to build it is to use the `FromStr::from_str` trait method. -/// This method takes care of usual IRI normalization and validation. -/// /// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation: /// ``` /// use rudf::model::NamedNode; /// /// assert_eq!( /// "", -/// NamedNode::new("http://example.com/foo").to_string() +/// NamedNode::parse("http://example.com/foo").unwrap().to_string() /// ) /// ``` /// @@ -31,8 +30,16 @@ impl fmt::Display for NamedNode { } impl NamedNode { - /// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) - pub fn new(iri: impl Into) -> Self { + /// Builds and validate a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) + pub fn parse(iri: impl Into) -> Result { + Ok(Self::new_from_iri(Iri::parse(iri.into())?)) + } + + pub(crate) fn new_from_iri(iri: Iri) -> Self { + Self::new_from_string(iri.into_string()) + } + + pub(crate) fn new_from_string(iri: impl Into) -> Self { Self { iri: iri.into() } } diff --git a/lib/src/model/vocab.rs b/lib/src/model/vocab.rs index 20ff974a..30b64051 100644 --- a/lib/src/model/vocab.rs +++ b/lib/src/model/vocab.rs @@ -8,54 +8,54 @@ pub mod rdf { lazy_static! { /// The class of containers of alternatives. pub static ref ALT: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt"); /// The class of unordered containers. pub static ref BAG: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag"); /// The first item in the subject RDF list. pub static ref FIRST: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); /// The class of HTML literal values. pub static ref HTML: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML"); /// The class of language-tagged string literal values. pub static ref LANG_STRING: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); /// The class of RDF Lists. pub static ref LIST: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#List"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#List"); pub static ref NIL: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); /// The object of the subject RDF statement. pub static ref OBJECT: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"); /// The predicate of the subject RDF statement. pub static ref PREDICATE: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"); /// The class of RDF properties. pub static ref PROPERTY: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"); /// The rest of the subject RDF list after the first item. pub static ref REST: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); /// The class of ordered containers. pub static ref SEQ: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq"); /// The class of RDF statements. pub static ref STATEMENT: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"); /// The subject of the subject RDF statement. pub static ref SUBJECT: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"); /// The subject is an instance of a class. pub static ref TYPE: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); /// Idiomatic property used for structured values. pub static ref VALUE: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#value"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#value"); /// The class of XML literal values. pub static ref XML_LITERAL: NamedNode = - NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"); + NamedNode::new_from_string("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"); } } @@ -67,49 +67,49 @@ pub mod rdfs { lazy_static! { /// The class of classes. pub static ref CLASS: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#Class"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#Class"); /// A description of the subject resource. pub static ref COMMENT: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#comment"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#comment"); /// The class of RDF containers. pub static ref CONTAINER: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#Container"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#Container"); /// The class of container membership properties, rdf:_1, rdf:_2, ..., all of which are sub-properties of 'member'. pub static ref CONTAINER_MEMBERSHIP_PROPERTY: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty"); /// The class of RDF datatypes. pub static ref DATATYPE: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#Datatype"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#Datatype"); /// A domain of the subject property. pub static ref DOMAIN: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#domain"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#domain"); /// The definition of the subject resource. pub static ref IS_DEFINED_BY: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#isDefinedBy"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#isDefinedBy"); /// A human-readable name for the subject. pub static ref LABEL: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#label"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#label"); /// The class of literal values, e.g. textual strings and integers. pub static ref LITERAL: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#Literal"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#Literal"); /// A member of the subject resource. pub static ref MEMBER: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#member"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#member"); /// A range of the subject property. pub static ref RANGE: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#range"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#range"); /// The class resource, everything. pub static ref RESOURCE: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#Resource"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#Resource"); /// Further information about the subject resource. pub static ref SEE_ALSO: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#seeAlso"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#seeAlso"); /// The subject is a subclass of a class. pub static ref SUB_CLASS_OF: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#subClassOf"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#subClassOf"); /// The subject is a subproperty of a property. pub static ref SUB_PROPERTY_OF: NamedNode = - NamedNode::new("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"); + NamedNode::new_from_string("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"); } } @@ -121,93 +121,93 @@ pub mod xsd { lazy_static! { /// true, false pub static ref BOOLEAN: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#boolean"); /// 128…+127 (8 bit) pub static ref BYTE: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#byte"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#byte"); /// Dates (yyyy-mm-dd) with or without timezone pub static ref DATE: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#date"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#date"); /// Duration of time (days, hours, minutes, seconds only) pub static ref DAY_TIME_DURATION: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#dayTimeDuration"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#dayTimeDuration"); /// Date and time with or without timezone pub static ref DATE_TIME: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#dateTime"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#dateTime"); /// Date and time with required timezone pub static ref DATE_TIME_STAMP: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#dateTimeStamp"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#dateTimeStamp"); /// Arbitrary-precision decimal numbers pub static ref DECIMAL: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#decimal"); /// 64-bit floating point numbers incl. ±Inf, ±0, NaN pub static ref DOUBLE: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#double"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#double"); /// Duration of time pub static ref DURATION: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#duration"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#duration"); /// 32-bit floating point numbers incl. ±Inf, ±0, NaN pub static ref FLOAT: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#float"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#float"); /// Gregorian calendar day of the month pub static ref G_DAY: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#gDay"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#gDay"); /// Gregorian calendar month pub static ref G_MONTH: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#gMonth"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#gMonth"); /// Gregorian calendar month and day pub static ref G_MONTH_DAY: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#gMonthDay"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#gMonthDay"); /// Gregorian calendar year pub static ref G_YEAR: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#gYear"); /// Gregorian calendar year and month pub static ref G_YEAR_MONTH: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#gYearMonth"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#gYearMonth"); /// -2147483648…+2147483647 (32 bit) pub static ref INT: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#int"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#int"); /// Arbitrary-size integer numbers pub static ref INTEGER: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#integer"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#integer"); /// -9223372036854775808…+9223372036854775807 (64 bit) pub static ref LONG: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#long"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#long"); /// Integer numbers <0 pub static ref NEGATIVE_INTEGER: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#negativeInteger"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#negativeInteger"); /// Integer numbers ≥0 pub static ref NON_NEGATIVE_INTEGER: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#nonNegativeInteger"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#nonNegativeInteger"); /// Integer numbers ≤0 pub static ref NON_POSITIVE_INTEGER: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#nonPositiveInteger"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#nonPositiveInteger"); /// Integer numbers >0 pub static ref POSITIVE_INTEGER: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#positiveInteger"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#positiveInteger"); /// Times (hh:mm:ss.sss…) with or without timezone pub static ref TIME: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#time"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#time"); /// -32768…+32767 (16 bit) pub static ref SHORT: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#short"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#short"); /// Character strings (but not all Unicode character strings) pub static ref STRING: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#string"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#string"); /// 0…255 (8 bit) pub static ref UNSIGNED_BYTE: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#unsignedByte"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#unsignedByte"); /// 0…4294967295 (32 bit) pub static ref UNSIGNED_INT: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#unsignedInt"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#unsignedInt"); /// 0…18446744073709551615 (64 bit) pub static ref UNSIGNED_LONG: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#unsignedLong"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#unsignedLong"); /// 0…65535 (16 bit) pub static ref UNSIGNED_SHORT: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#unsignedShort"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#unsignedShort"); /// Duration of time (months and years only) pub static ref YEAR_MONTH_DURATION: NamedNode = - NamedNode::new("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); + NamedNode::new_from_string("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); } } diff --git a/lib/src/repository.rs b/lib/src/repository.rs index 469bc05b..19c36c91 100644 --- a/lib/src/repository.rs +++ b/lib/src/repository.rs @@ -21,7 +21,7 @@ use std::io::{BufRead, Read}; /// let connection = repository.connection().unwrap(); /// /// // insertion -/// let ex = NamedNode::new("http://example.com"); +/// let ex = NamedNode::parse("http://example.com").unwrap(); /// let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), None); /// connection.insert(&quad); /// @@ -71,7 +71,7 @@ pub trait RepositoryConnection: Clone { /// let connection = repository.connection().unwrap(); /// /// // insertions - /// let ex = NamedNode::new("http://example.com"); + /// let ex = NamedNode::parse("http://example.com").unwrap(); /// connection.insert(&Quad::new(ex.clone(), ex.clone(), ex.clone(), None)); /// /// // SPARQL query @@ -94,7 +94,7 @@ pub trait RepositoryConnection: Clone { /// let connection = repository.connection().unwrap(); /// /// // insertion - /// let ex = NamedNode::new("http://example.com"); + /// let ex = NamedNode::parse("http://example.com").unwrap(); /// let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), None); /// connection.insert(&quad); /// @@ -128,7 +128,7 @@ pub trait RepositoryConnection: Clone { /// /// // quad filter /// let results: Result> = connection.quads_for_pattern(None, None, None, None).collect(); - /// let ex = NamedNode::new("http://example.com"); + /// let ex = NamedNode::parse("http://example.com").unwrap(); /// assert_eq!(vec![Quad::new(ex.clone(), ex.clone(), ex.clone(), None)], results.unwrap()); /// ``` fn load_graph( @@ -155,7 +155,7 @@ pub trait RepositoryConnection: Clone { /// /// // quad filter /// let results: Result> = connection.quads_for_pattern(None, None, None, None).collect(); - /// let ex = NamedNode::new("http://example.com"); + /// let ex = NamedNode::parse("http://example.com").unwrap(); /// assert_eq!(vec![Quad::new(ex.clone(), ex.clone(), ex.clone(), Some(ex.into()))], results.unwrap()); /// ``` fn load_dataset( diff --git a/lib/src/sparql/parser.rs b/lib/src/sparql/parser.rs index 80979f1f..2bf60200 100644 --- a/lib/src/sparql/parser.rs +++ b/lib/src/sparql/parser.rs @@ -22,8 +22,6 @@ mod grammar { use std::io::BufReader; use std::io::Read; use std::str::Chars; - use url::ParseOptions; - use url::Url; struct FocusedTriplePattern { focus: F, @@ -296,15 +294,19 @@ mod grammar { } pub struct ParserState { - base_uri: Option, + base_iri: Option, namespaces: HashMap, bnodes_map: BTreeMap, aggregations: BTreeMap, } impl ParserState { - fn url_parser(&self) -> ParseOptions<'_> { - Url::options().base_url(self.base_uri.as_ref()) + fn parse_iri(&self, iri: &str) -> Result { + if let Some(base_iri) = &self.base_iri { + base_iri.resolve(iri) + } else { + Iri::parse(iri.to_owned()) + } } fn new_aggregation(&mut self, agg: Aggregation) -> Variable { @@ -533,11 +535,11 @@ mod grammar { pub fn read_sparql_query<'a, R: Read + 'a>( source: R, - base_uri: Option<&'a str>, + base_iri: Option<&'a str>, ) -> super::super::super::Result { let mut state = ParserState { - base_uri: if let Some(base_uri) = base_uri { - Some(Url::parse(base_uri)?) + base_iri: if let Some(base_iri) = base_iri { + Some(Iri::parse(base_iri.to_owned())?) } else { None }, diff --git a/lib/src/sparql/sparql_grammar.rustpeg b/lib/src/sparql/sparql_grammar.rustpeg index 0665bf6a..5289719f 100644 --- a/lib/src/sparql/sparql_grammar.rustpeg +++ b/lib/src/sparql/sparql_grammar.rustpeg @@ -19,19 +19,13 @@ Query -> QueryVariants = _ Prologue _ q:(SelectQuery / ConstructQuery / Describe Prologue -> () = (BaseDecl _ / PrefixDecl _)* //[5] -BaseDecl -> () = "BASE"i _ i:IRIREF {? - match state.url_parser().parse(&i) { - Ok(url) => { - state.base_uri = Some(url); - Ok(()) - }, - Err(error) => Err("IRI parsing failed") - } +BaseDecl -> () = "BASE"i _ i:IRIREF { + state.base_iri = Some(i) } //[6] PrefixDecl -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { - state.namespaces.insert(ns.into(), i); + state.namespaces.insert(ns.into(), i.into_string()); } //[7] @@ -905,16 +899,17 @@ BooleanLiteral -> Literal = String -> String = STRING_LITERAL_LONG1 / STRING_LITERAL_LONG2 / STRING_LITERAL1 / STRING_LITERAL2 //[136] -iri -> NamedNode = i:(IRIREF / PrefixedName) {? - match state.url_parser().parse(&i) { - Ok(url) => Ok(NamedNode::new(url.into_string())), - Err(error) => Err("IRI parsing failed") - } +iri -> NamedNode = i:(IRIREF / PrefixedName) { + NamedNode::new_from_iri(i) } //[137] -PrefixedName -> String = PNAME_LN / - ns:PNAME_NS {? state.namespaces.get(ns).cloned().ok_or("Prefix not found") } +PrefixedName -> Iri = PNAME_LN / + ns:PNAME_NS {? if let Some(iri) = state.namespaces.get(ns).cloned() { + Iri::parse(iri).map_err(|_| "IRI parsing failed") + } else { + Err("Prefix not found") + } } //[138] BlankNode -> BlankNode = @@ -922,8 +917,8 @@ BlankNode -> BlankNode = ANON { BlankNode::default() } //[139] -IRIREF -> String = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" { - i.to_owned() +IRIREF -> Iri = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" {? + state.parse_iri(i).map_err(|_| "IRI parsing failed") } //[140] @@ -932,8 +927,12 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { } //[141] -PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {? - state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)).ok_or("Prefix not found") +PNAME_LN -> Iri = ns:$(PNAME_NS) local:$(PN_LOCAL) {? + if let Some(iri) = state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)) { + Iri::parse(iri).map_err(|_| "IRI parsing failed") + } else { + Err("Prefix not found") + } } //[142] diff --git a/lib/src/sparql/xml_results.rs b/lib/src/sparql/xml_results.rs index 7a5bfa9a..84d4f777 100644 --- a/lib/src/sparql/xml_results.rs +++ b/lib/src/sparql/xml_results.rs @@ -339,9 +339,12 @@ impl Iterator for ResultsIterator { } else if attr.key == b"datatype" { match attr.unescaped_value() { Ok(val) => { - datatype = Some(NamedNode::new( + match NamedNode::parse( self.reader.decode(&val).to_string(), - )); + ) { + Ok(iri) => datatype = Some(iri), + Err(error) => return Some(Err(error)), + } } Err(error) => return Some(Err(error.into())), } @@ -360,7 +363,12 @@ impl Iterator for ResultsIterator { }, Event::Text(event) => match event.unescaped() { Ok(data) => match state { - State::Uri => term = Some(NamedNode::new(self.reader.decode(&data)).into()), + State::Uri => match NamedNode::parse(self.reader.decode(&data)) { + Ok(uri) => { + term = Some(uri.into()); + } + Err(error) => return Some(Err(error)), + }, State::BNode => { term = Some( self.bnodes_map diff --git a/lib/src/store/memory.rs b/lib/src/store/memory.rs index 4eb6aeb2..3ec9f60e 100644 --- a/lib/src/store/memory.rs +++ b/lib/src/store/memory.rs @@ -23,7 +23,7 @@ use std::sync::RwLockWriteGuard; /// let connection = repository.connection().unwrap(); /// /// // insertion -/// let ex = NamedNode::new("http://example.com"); +/// let ex = NamedNode::parse("http://example.com").unwrap(); /// let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), None); /// connection.insert(&quad); /// diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 4c08f387..cea6ef61 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -714,7 +714,7 @@ impl Encoder { //TODO: optimize self.encode_literal(&Literal::new_typed_literal( value, - NamedNode::new(datatype.iri), + NamedNode::new_from_string(datatype.iri), )) } } @@ -781,7 +781,7 @@ impl Encoder { Err(format_err!("The default graph tag is not a valid term")) } EncodedTerm::NamedNode { iri_id } => { - Ok(NamedNode::new(self.string_store.get_str(iri_id)?).into()) + Ok(NamedNode::new_from_string(self.string_store.get_str(iri_id)?).into()) } EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), EncodedTerm::StringLiteral { value_id } => { @@ -800,7 +800,7 @@ impl Encoder { datatype_id, } => Ok(Literal::new_typed_literal( self.string_store.get_str(value_id)?, - NamedNode::new(self.string_store.get_str(datatype_id)?), + NamedNode::new_from_string(self.string_store.get_str(datatype_id)?), ) .into()), EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(value).into()), @@ -884,9 +884,9 @@ impl From> for MutexPoisonError { fn test_encoding() { let encoder: Encoder = Encoder::default(); let terms: Vec = vec![ - NamedNode::new("http://foo.com").into(), - NamedNode::new("http://bar.com").into(), - NamedNode::new("http://foo.com").into(), + NamedNode::new_from_string("http://foo.com").into(), + NamedNode::new_from_string("http://bar.com").into(), + NamedNode::new_from_string("http://foo.com").into(), BlankNode::default().into(), Literal::new_simple_literal("foo").into(), Literal::from(true).into(), diff --git a/lib/src/store/rocksdb.rs b/lib/src/store/rocksdb.rs index bfab7ea8..601ec2b9 100644 --- a/lib/src/store/rocksdb.rs +++ b/lib/src/store/rocksdb.rs @@ -33,7 +33,7 @@ use std::sync::Mutex; /// let connection = repository.connection().unwrap(); /// /// // insertion -/// let ex = NamedNode::new("http://example.com"); +/// let ex = NamedNode::parse("http://example.com").unwrap(); /// let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), None); /// connection.insert(&quad); /// diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 14c94278..b3db0554 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -18,10 +18,10 @@ fn sparql_w3c_syntax_testsuite() -> Result<()> { let manifest_11_url = "http://www.w3.org/2009/sparql/docs/tests/data-sparql11/syntax-query/manifest.ttl"; let test_blacklist = vec![ - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct02"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct04"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-function-04"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-qname-04"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct02").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-form-construct04").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql2/manifest#syntax-function-04").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/syntax-sparql1/manifest#syntax-qname-04").unwrap(), ]; for test_result in TestManifest::new(manifest_10_url).chain(TestManifest::new(manifest_11_url)) @@ -85,29 +85,25 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { ]; let test_blacklist = vec![ //Multiple writing of the same xsd:integer. Our system does strong normalization. - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-1"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-1"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-2"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-1"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-2"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-1").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-9").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-1").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-str-2").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-1").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-equals/manifest#eq-graph-2").unwrap(), //Multiple writing of the same xsd:double. Our system does strong normalization. - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-simple"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-eq"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-not-eq"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-simple").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-eq").unwrap(), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#sameTerm-not-eq").unwrap(), //Simple literal vs xsd:string. We apply RDF 1.1 - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-2"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/distinct/manifest#distinct-2").unwrap(), //URI normalization: we are not normalizing well - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-1"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-2"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-3"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#kanji-1"), - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#kanji-2"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/i18n/manifest#normalization-2").unwrap(), //Test on curly brace scoping with OPTIONAL filter - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified"), + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/optional-filter/manifest#dawg-optional-filter-005-not-simplified").unwrap(), //DATATYPE("foo"@en) returns rdf:langString in SPARQL 1.1 - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-datatype-2") + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/expr-builtin/manifest#dawg-datatype-2").unwrap() ]; for test_result in manifest_10_urls @@ -127,7 +123,7 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { load_graph_to_repository( &graph_data, &repository.connection()?, - Some(&NamedNode::new(graph_data).into()), + Some(&NamedNode::parse(graph_data)?.into()), )?; } match repository @@ -260,21 +256,30 @@ mod rs { lazy_static! { pub static ref RESULT_SET: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#ResultSet"); - pub static ref RESULT_VARIABLE: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#resultVariable"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#ResultSet") + .unwrap(); + pub static ref RESULT_VARIABLE: NamedNode = NamedNode::parse( + "http://www.w3.org/2001/sw/DataAccess/tests/result-set#resultVariable" + ) + .unwrap(); pub static ref SOLUTION: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#solution"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#solution") + .unwrap(); pub static ref BINDING: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#binding"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#binding") + .unwrap(); pub static ref VALUE: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#value"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#value") + .unwrap(); pub static ref VARIABLE: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#variable"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#variable") + .unwrap(); pub static ref INDEX: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#index"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#index") + .unwrap(); pub static ref BOOLEAN: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/result-set#boolean"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/result-set#boolean") + .unwrap(); } } @@ -409,15 +414,20 @@ pub mod mf { lazy_static! { pub static ref INCLUDE: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#include"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#include") + .unwrap(); pub static ref ENTRIES: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#entries"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#entries") + .unwrap(); pub static ref NAME: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#name"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#name") + .unwrap(); pub static ref ACTION: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action") + .unwrap(); pub static ref RESULT: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result") + .unwrap(); } } @@ -427,11 +437,13 @@ pub mod qt { lazy_static! { pub static ref QUERY: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-query#query"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-query#query") + .unwrap(); pub static ref DATA: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-query#data"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-query#data").unwrap(); pub static ref GRAPH_DATA: NamedNode = - NamedNode::new("http://www.w3.org/2001/sw/DataAccess/tests/test-query#graphData"); + NamedNode::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-query#graphData") + .unwrap(); } } @@ -523,7 +535,8 @@ impl Iterator for TestManifest { None => { match self.manifests_to_do.pop() { Some(url) => { - let manifest = NamedOrBlankNode::from(NamedNode::new(url.clone())); + let manifest = + NamedOrBlankNode::from(NamedNode::parse(url.clone()).unwrap()); match load_graph(&url) { Ok(g) => self.graph.extend(g.into_iter()), Err(e) => return Some(Err(e.into())),