diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a1972a8b..b6e55e86 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -27,9 +27,9 @@ rust_decimal = "1" chrono = "0.4" failure = "0.1" regex = "1" -rio_api = "0.2" -rio_turtle = "0.2" -rio_xml = "0.2" +rio_api = "0.3" +rio_turtle = "0.3" +rio_xml = "0.3" [build-dependencies] peg = "0.5" diff --git a/lib/src/model/blank_node.rs b/lib/src/model/blank_node.rs index a2607f28..be37a4c4 100644 --- a/lib/src/model/blank_node.rs +++ b/lib/src/model/blank_node.rs @@ -1,3 +1,4 @@ +use rio_api::model as rio; use std::fmt; use uuid::Uuid; @@ -13,31 +14,46 @@ use uuid::Uuid; /// #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct BlankNode { - id: Uuid, + uuid: Uuid, + id: String, } impl BlankNode { - /// Returns the underlying UUID of this blank node - pub fn as_uuid(&self) -> &Uuid { + /// Returns the underlying ID of this blank node + pub fn as_str(&self) -> &str { &self.id } + + /// Returns the underlying UUID of this blank node + pub fn uuid(&self) -> Uuid { + self.uuid + } } impl fmt::Display for BlankNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "_:{}", self.id.to_simple()) + rio::BlankNode::from(self).fmt(f) } } impl Default for BlankNode { /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id fn default() -> Self { - Self { id: Uuid::new_v4() } + Self::from(Uuid::new_v4()) } } impl From for BlankNode { fn from(id: Uuid) -> Self { - Self { id } + Self { + uuid: id, + id: id.to_simple().to_string(), + } + } +} + +impl<'a> From<&'a BlankNode> for rio::BlankNode<'a> { + fn from(node: &'a BlankNode) -> Self { + rio::BlankNode { id: node.as_str() } } } diff --git a/lib/src/model/iri.rs b/lib/src/model/iri.rs deleted file mode 100644 index b1a5a160..00000000 --- a/lib/src/model/iri.rs +++ /dev/null @@ -1,632 +0,0 @@ -use std::error::Error; -use std::fmt; - -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct Iri { - iri: String, - positions: IriElementsPositions, -} - -impl Iri { - pub fn parse(iri: String) -> Result { - let base_positions = - parse_iri(iri.as_bytes(), 0).map_err(|position| IriParseError { position })?; - Ok(Self { - iri, - positions: base_positions, - }) - } - - pub fn resolve(&self, iri: &str) -> Result { - let mut target_buffer = String::with_capacity(self.iri.len() + iri.len()); - let positions = resolve_relative_iri(iri, &self.iri, &self.positions, &mut target_buffer) - .map_err(|position| IriParseError { position })?; - Ok(Self { - iri: target_buffer, - positions, - }) - } - - pub fn into_string(self) -> String { - self.iri - } -} - -#[derive(Debug)] -pub struct IriParseError { - position: usize, -} - -impl fmt::Display for IriParseError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Invalid IRI at char {}", self.position) - } -} - -impl Error for IriParseError {} - -type IriState = Result; // usize = the end position - -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -struct IriElementsPositions { - scheme_end: usize, - authority_end: usize, - path_end: usize, - query_end: usize, - fragment_end: usize, -} - -// RFC 3986 5.2 Relative Resolution algorithm -fn resolve_relative_iri( - reference_iri: &str, - base_iri: &str, - base_positions: &IriElementsPositions, - target_buffer: &mut String, -) -> Result { - let base_scheme = &base_iri[0..base_positions.scheme_end]; - let base_authority = &base_iri[base_positions.scheme_end..base_positions.authority_end]; - let base_path = &base_iri[base_positions.authority_end..base_positions.path_end]; - let base_query = &base_iri[base_positions.path_end..base_positions.query_end]; - - let reference_positions = parse_iri_reference(reference_iri.as_bytes(), 0)?; - let r_scheme = &reference_iri[0..reference_positions.scheme_end]; - let r_authority = - &reference_iri[reference_positions.scheme_end..reference_positions.authority_end]; - let r_path = &reference_iri[reference_positions.authority_end..reference_positions.path_end]; - let r_query = &reference_iri[reference_positions.path_end..reference_positions.query_end]; - let r_fragment = &reference_iri[reference_positions.query_end..]; - - let scheme_end; - let authority_end; - let path_end; - let query_end; - let fragment_end; - - // if defined(R.scheme) then - if !r_scheme.is_empty() { - // T.scheme = R.scheme; - target_buffer.push_str(r_scheme); - scheme_end = target_buffer.len(); - - // T.authority = R.authority; - target_buffer.push_str(r_authority); - authority_end = target_buffer.len(); - - // T.path = remove_dot_segments(R.path); - append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); - path_end = target_buffer.len(); - - // T.query = R.query; - target_buffer.push_str(r_query); - query_end = target_buffer.len(); - - // T.fragment = R.fragment; - target_buffer.push_str(r_fragment); - fragment_end = target_buffer.len(); - } else { - // T.scheme = Base.scheme; - target_buffer.push_str(base_scheme); - scheme_end = target_buffer.len(); - - // if defined(R.authority) then - if !r_authority.is_empty() { - // T.authority = R.authority; - target_buffer.push_str(r_authority); - authority_end = target_buffer.len(); - - // T.path = remove_dot_segments(R.path); - append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); - path_end = target_buffer.len(); - - // T.query = R.query; - target_buffer.push_str(r_query); - query_end = target_buffer.len(); - - // T.fragment = R.fragment; - target_buffer.push_str(r_fragment); - fragment_end = target_buffer.len(); - } else { - // T.authority = Base.authority; - target_buffer.push_str(base_authority); - authority_end = target_buffer.len(); - - // if (R.path == "") then - if r_path == "" { - // T.path = Base.path; - target_buffer.push_str(base_path); - path_end = target_buffer.len(); - - // if defined(R.query) then - if !r_query.is_empty() { - // T.query = R.query; - target_buffer.push_str(r_query); - } else { - // T.query = Base.query; - target_buffer.push_str(base_query); - } - query_end = target_buffer.len(); - } else { - // if (R.path starts-with "/") then - if r_path.starts_with('/') { - // T.path = remove_dot_segments(R.path); - append_and_remove_dot_segments(r_path, target_buffer, target_buffer.len()); - } else { - let path_start_in_target = target_buffer.len(); - // T.path = merge(Base.path, R.path); - // T.path = remove_dot_segments(T.path); - if base_positions.authority_end > base_positions.scheme_end - && base_positions.path_end == base_positions.authority_end - { - append_and_remove_dot_segments_with_extra_slash( - r_path, - target_buffer, - path_start_in_target, - ); - } else { - let last_base_slash = base_path - .char_indices() - .rev() - .find(|(_, c)| *c == '/') - .map_or(0, |(i, _)| i) - + base_positions.authority_end; - append_and_remove_dot_segments( - &base_iri[base_positions.authority_end..=last_base_slash], - target_buffer, - path_start_in_target, - ); - if target_buffer.ends_with('/') { - target_buffer.pop(); - append_and_remove_dot_segments_with_extra_slash( - r_path, - target_buffer, - path_start_in_target, - ); - } else { - append_and_remove_dot_segments( - r_path, - target_buffer, - path_start_in_target, - ); - } - } - } - path_end = target_buffer.len(); - - // T.query = R.query; - target_buffer.push_str(r_query); - query_end = target_buffer.len(); - } - // T.fragment = R.fragment; - target_buffer.push_str(r_fragment); - fragment_end = target_buffer.len(); - } - } - Ok(IriElementsPositions { - scheme_end, - authority_end, - path_end, - query_end, - fragment_end, - }) -} - -// RFC 3986 5.2.4 Remove Dot Segments -fn append_and_remove_dot_segments( - mut input: &str, - output: &mut String, - path_start_in_output: usize, -) { - while !input.is_empty() { - if input.starts_with("../") { - input = &input[3..]; - } else if input.starts_with("./") || input.starts_with("/./") { - input = &input[2..]; - } else if input == "/." { - input = "/"; - } else if input.starts_with("/../") { - pop_last_segment(output, path_start_in_output); - input = &input[3..]; - } else if input == "/.." { - pop_last_segment(output, path_start_in_output); - input = "/"; - } else if input == "." || input == ".." { - input = ""; - } else { - if input.starts_with('/') { - output.push('/'); - input = &input[1..]; - } - if let Some(i) = input.find('/') { - output.push_str(&input[..i]); - input = &input[i..]; - } else { - output.push_str(input); - input = ""; - } - } - } -} - -fn pop_last_segment(buffer: &mut String, path_start_in_buffer: usize) { - if let Some((last_slash_position, _)) = buffer[path_start_in_buffer..] - .char_indices() - .rev() - .find(|(_, c)| *c == '/') - { - buffer.truncate(last_slash_position + path_start_in_buffer) - } -} - -fn append_and_remove_dot_segments_with_extra_slash( - input: &str, - output: &mut String, - path_start_in_output: usize, -) { - if input.is_empty() { - output.push('/'); - } else if input.starts_with("./") { - append_and_remove_dot_segments(&input[1..], output, path_start_in_output) - } else if input == "." { - append_and_remove_dot_segments("/", output, path_start_in_output) - } else if input.starts_with("../") { - pop_last_segment(output, path_start_in_output); - append_and_remove_dot_segments(&input[2..], output, path_start_in_output) - } else if input == ".." { - pop_last_segment(output, path_start_in_output); - append_and_remove_dot_segments("/", output, path_start_in_output) - } else { - output.push('/'); - if let Some(i) = input.find('/') { - output.push_str(&input[..i]); - append_and_remove_dot_segments(&input[i..], output, path_start_in_output) - } else { - output.push_str(input); - } - } -} - -fn parse_iri(value: &[u8], start: usize) -> Result { - // IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] - let scheme_end = parse_scheme(value, start)?; - if scheme_end >= value.len() || value[scheme_end] != b':' { - return Err(scheme_end); - } - - let (authority_end, path_end) = parse_ihier_part(value, scheme_end + 1)?; - - let query_end = if path_end < value.len() && value[path_end] == b'?' { - parse_iquery(value, path_end + 1)? - } else { - path_end - }; - - let fragment_end = if query_end < value.len() && value[query_end] == b'#' { - parse_ifragment(value, query_end + 1)? - } else { - query_end - }; - - Ok(IriElementsPositions { - scheme_end: scheme_end + 1, - authority_end, - path_end, - query_end, - fragment_end, - }) -} - -fn parse_ihier_part(value: &[u8], start: usize) -> Result<(usize, usize), usize> { - // (authority_end, path_end) - // ihier-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-rootless / ipath-empty - if value[start..].starts_with(b"//") { - let authority_end = parse_iauthority(value, start + 2)?; - Ok((authority_end, parse_ipath_abempty(value, authority_end)?)) - } else if value[start..].starts_with(b"/") { - Ok((start, parse_ipath_absolute(value, start)?)) - } else { - match parse_ipath_rootless(value, start) { - Ok(i) => Ok((start, i)), - Err(i) => { - if i == start { - Ok((start, i)) // ipath empty - } else { - Err(i) - } - } - } - } -} - -fn parse_iri_reference(value: &[u8], start: usize) -> Result { - // IRI-reference = IRI / irelative-ref - match parse_iri(value, start) { - Ok(positions) => Ok(positions), - Err(_) => parse_irelative_ref(value, start), - } -} - -fn parse_irelative_ref(value: &[u8], start: usize) -> Result { - // irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] - let (authority_end, path_end) = parse_irelative_path(value, start)?; - - let query_end = if path_end < value.len() && value[path_end] == b'?' { - parse_iquery(value, path_end + 1)? - } else { - path_end - }; - let fragment_end = if query_end < value.len() && value[query_end] == b'#' { - parse_ifragment(&value, query_end + 1)? - } else { - query_end - }; - - Ok(IriElementsPositions { - scheme_end: start, - authority_end, - path_end, - query_end, - fragment_end, - }) -} - -fn parse_irelative_path(value: &[u8], start: usize) -> Result<(usize, usize), usize> { - // (authority_end, path_end) - // irelative-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-noscheme / ipath-empty - if value[start..].starts_with(b"//") { - let authority_end = parse_iauthority(&value, start + 2)?; - Ok((authority_end, parse_ipath_abempty(value, authority_end)?)) - } else if value[start..].starts_with(b"/") { - Ok((start, parse_ipath_absolute(value, start)?)) - } else { - match parse_ipath_noscheme(value, start) { - Ok(i) => Ok((start, i)), - Err(i) => { - if i == start { - Ok((start, i)) // ipath empty - } else { - Err(i) - } - } - } - } -} - -fn parse_scheme(value: &[u8], start: usize) -> IriState { - // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - if value.len() <= start || !is_alpha(value[start]) { - return Err(start); - } - for (i, c) in value[start..].iter().enumerate() { - match *c { - c if is_alpha(c) || is_digit(c) || c == b'+' || c == b'-' || c == b'.' => (), - _ => return Ok(start + i), - } - } - Err(value.len()) -} - -fn parse_iauthority(value: &[u8], start: usize) -> IriState { - // iauthority = [ iuserinfo "@" ] ihost [ ":" port ] - //TODO: implement properly - for (i, c) in value[start..].iter().enumerate() { - match *c { - b'/' | b'?' | b'#' => return Ok(start + i), - _ => (), - } - } - Ok(value.len()) -} - -fn parse_ipath_abempty(value: &[u8], start: usize) -> IriState { - // ipath-abempty = *( "/" isegment ) - let mut i = start; - while i < value.len() { - match value[i] { - b'/' => { - i = parse_isegment(value, i + 1)?; - } - _ => return Ok(i), - } - } - Ok(value.len()) -} - -fn parse_ipath_absolute(value: &[u8], start: usize) -> IriState { - // ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] = "/" [ isegment-nz ipath-abempty ] - if !value[start..].starts_with(b"/") { - return Err(start); - } - - match parse_isegment_nz(value, start + 1) { - Ok(i) => parse_ipath_abempty(value, i), - Err(i) => { - if i == start + 1 { - Ok(i) // optional - } else { - Err(i) - } - } - } -} - -fn parse_ipath_noscheme(value: &[u8], start: usize) -> IriState { - // ipath-noscheme = isegment-nz-nc *( "/" isegment ) = isegment-nz-nc ipath-abempty - let i = parse_isegment_nz_nc(value, start)?; - parse_ipath_abempty(&value, i) -} - -fn parse_ipath_rootless(value: &[u8], start: usize) -> IriState { - // ipath-rootless = isegment-nz *( "/" isegment ) = isegment-nz ipath-abempty - let i = parse_isegment_nz(value, start)?; - parse_ipath_abempty(value, i) -} - -fn parse_isegment(value: &[u8], start: usize) -> IriState { - // isegment = *ipchar - //TODO: implement properly - for (i, c) in value[start..].iter().enumerate() { - match *c { - b'/' | b'?' | b'#' => return Ok(start + i), - _ => (), - } - } - Ok(value.len()) -} - -fn parse_isegment_nz(value: &[u8], start: usize) -> IriState { - // isegment-nz = 1*ipchar - let i = parse_isegment(value, start)?; - if i == start { - Err(0) - } else { - Ok(i) - } -} - -fn parse_isegment_nz_nc(value: &[u8], start: usize) -> IriState { - // isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" ) - //TODO: implement properly - for (i, c) in value[start..].iter().enumerate() { - match *c { - b'/' | b'?' | b'#' | b':' => return if i == start { Err(i) } else { Ok(i) }, - _ => (), - } - } - Ok(value.len()) -} - -fn parse_iquery(value: &[u8], start: usize) -> IriState { - // iquery = *( ipchar / iprivate / "/" / "?" ) - //TODO: implement properly - for (i, c) in value[start..].iter().enumerate() { - if *c == b'#' { - return Ok(start + i); - } - } - Ok(value.len()) -} - -fn parse_ifragment(value: &[u8], _start: usize) -> IriState { - // ifragment = *( ipchar / "/" / "?" ) - //TODO: implement properly - Ok(value.len()) -} - -fn is_alpha(b: u8) -> bool { - match b { - b'a'..=b'z' | b'A'..=b'Z' => true, - _ => false, - } -} - -fn is_digit(b: u8) -> bool { - match b { - b'0'..=b'9' => true, - _ => false, - } -} - -#[test] -fn test_parsing() { - let examples = [ - "file://foo", - "ftp://ftp.is.co.za/rfc/rfc1808.txt", - "http://www.ietf.org/rfc/rfc2396.txt", - "ldap://[2001:db8::7]/c=GB?objectClass?one", - "mailto:John.Doe@example.com", - "news:comp.infosystems.www.servers.unix", - "tel:+1-816-555-1212", - "telnet://192.0.2.16:80/", - "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", - "http://example.com", - "http://example.com/", - "http://example.com/foo", - "http://example.com/foo/bar", - "http://example.com/foo/bar/", - "http://example.com/foo/bar?q=1&r=2", - "http://example.com/foo/bar/?q=1&r=2", - "http://example.com#toto", - "http://example.com/#toto", - "http://example.com/foo#toto", - "http://example.com/foo/bar#toto", - "http://example.com/foo/bar/#toto", - "http://example.com/foo/bar?q=1&r=2#toto", - "http://example.com/foo/bar/?q=1&r=2#toto", - ]; - - for e in &examples { - assert!( - Iri::parse(e.to_string()).is_ok(), - "{} is not recognized as an IRI", - e - ); - } -} - -#[test] -fn test_resolve_relative_iri() { - let base = "http://a/b/c/d;p?q"; - - let examples = [ - ("g:h", "g:h"), - ("g", "http://a/b/c/g"), - ("g/", "http://a/b/c/g/"), - ("/g", "http://a/g"), - ("//g", "http://g"), - ("?y", "http://a/b/c/d;p?y"), - ("g?y", "http://a/b/c/g?y"), - ("#s", "http://a/b/c/d;p?q#s"), - ("g#s", "http://a/b/c/g#s"), - ("g?y#s", "http://a/b/c/g?y#s"), - (";x", "http://a/b/c/;x"), - ("g;x", "http://a/b/c/g;x"), - ("g;x?y#s", "http://a/b/c/g;x?y#s"), - ("", "http://a/b/c/d;p?q"), - (".", "http://a/b/c/"), - ("./", "http://a/b/c/"), - ("./g", "http://a/b/c/g"), - ("..", "http://a/b/"), - ("../", "http://a/b/"), - ("../g", "http://a/b/g"), - ("../..", "http://a/"), - ("../../", "http://a/"), - ("../../g", "http://a/g"), - ("../../../g", "http://a/g"), - ("../../../../g", "http://a/g"), - ("/./g", "http://a/g"), - ("/../g", "http://a/g"), - ("g.", "http://a/b/c/g."), - (".g", "http://a/b/c/.g"), - ("g..", "http://a/b/c/g.."), - ("..g", "http://a/b/c/..g"), - ("./../g", "http://a/b/g"), - ("./g/.", "http://a/b/c/g/"), - ("g/./h", "http://a/b/c/g/h"), - ("g/../h", "http://a/b/c/h"), - ("g;x=1/./y", "http://a/b/c/g;x=1/y"), - ("g;x=1/../y", "http://a/b/c/y"), - ("g?y/./x", "http://a/b/c/g?y/./x"), - ("g?y/../x", "http://a/b/c/g?y/../x"), - ("g#s/./x", "http://a/b/c/g#s/./x"), - ("g#s/../x", "http://a/b/c/g#s/../x"), - ("http:g", "http:g"), - ("./g:h", "http://a/b/c/g:h"), - ]; - - let base = Iri::parse(base.to_owned()).unwrap(); - for (input, output) in examples.iter() { - let result = base.resolve(input); - assert!( - result.is_ok(), - "Resolving of {} failed with error: {}", - input, - result.unwrap_err() - ); - let result = result.unwrap().into_string(); - assert_eq!( - result, *output, - "Resolving of {} is wrong. Found {} and expecting {}", - input, result, output - ); - } -} diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index ac76cb80..1fd3b3ea 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -1,13 +1,7 @@ use crate::model::named_node::NamedNode; use crate::model::vocab::rdf; use crate::model::vocab::xsd; -use chrono::format::{parse, Parsed, StrftimeItems}; use chrono::prelude::*; -use num_traits::identities::Zero; -use num_traits::FromPrimitive; -use num_traits::One; -use num_traits::ToPrimitive; -use ordered_float::OrderedFloat; use rio_api::model as rio; use rust_decimal::Decimal; use std::borrow::Cow; @@ -22,8 +16,8 @@ use std::option::Option; /// use rudf::model::vocab::xsd; /// /// assert_eq!( -/// "\"foo\\tbar\"", -/// Literal::new_simple_literal("foo\tbar").to_string() +/// "\"foo\\nbar\"", +/// Literal::new_simple_literal("foo\nbar").to_string() /// ); /// /// assert_eq!( @@ -43,16 +37,6 @@ pub struct Literal(LiteralContent); enum LiteralContent { String(String), LanguageTaggedString { value: String, language: String }, - Boolean(bool), - Float(OrderedFloat), - Double(OrderedFloat), - Integer(i128), - Decimal(Decimal), - Date(Date), - NaiveDate(NaiveDate), - NaiveTime(NaiveTime), - DateTime(DateTime), - NaiveDateTime(NaiveDateTime), TypedLiteral { value: String, datatype: NamedNode }, } @@ -66,77 +50,8 @@ impl Literal { pub fn new_typed_literal(value: impl Into, datatype: impl Into) -> Self { let value = value.into(); let datatype = datatype.into(); - Literal(if datatype == *xsd::BOOLEAN { - match value.as_str() { - "true" | "1" => LiteralContent::Boolean(true), - "false" | "0" => LiteralContent::Boolean(false), - _ => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::STRING { + Literal(if datatype == *xsd::STRING { LiteralContent::String(value) - } else if datatype == *xsd::FLOAT { - match value.parse() { - Ok(value) => LiteralContent::Float(OrderedFloat(value)), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::DOUBLE { - match value.parse() { - Ok(value) => LiteralContent::Double(OrderedFloat(value)), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::INTEGER - || datatype == *xsd::BYTE - || datatype == *xsd::SHORT - || datatype == *xsd::INT - || datatype == *xsd::LONG - || datatype == *xsd::UNSIGNED_BYTE - || datatype == *xsd::UNSIGNED_SHORT - || datatype == *xsd::UNSIGNED_INT - || datatype == *xsd::UNSIGNED_LONG - || datatype == *xsd::POSITIVE_INTEGER - || datatype == *xsd::NEGATIVE_INTEGER - || datatype == *xsd::NON_POSITIVE_INTEGER - || datatype == *xsd::NON_NEGATIVE_INTEGER - { - match value.parse() { - Ok(value) => LiteralContent::Integer(value), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::DECIMAL { - match value.parse() { - Ok(value) => LiteralContent::Decimal(value), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::DATE { - let mut parsed = Parsed::new(); - match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then(|_| { - Ok(Date::from_utc( - parsed.to_naive_date()?, - parsed.to_fixed_offset()?, - )) - }) { - Ok(value) => LiteralContent::Date(value), - Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") { - Ok(value) => LiteralContent::Date(Date::from_utc(value, FixedOffset::east(0))), - Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%d") { - Ok(value) => LiteralContent::NaiveDate(value), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - }, - }, - } - } else if datatype == *xsd::TIME { - match NaiveTime::parse_from_str(&value, "%H:%M:%S") { - Ok(value) => LiteralContent::NaiveTime(value), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - } - } else if datatype == *xsd::DATE_TIME || datatype == *xsd::DATE_TIME_STAMP { - match DateTime::parse_from_rfc3339(&value) { - Ok(value) => LiteralContent::DateTime(value), - Err(_) => match NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S") { - Ok(value) => LiteralContent::NaiveDateTime(value), - Err(_) => LiteralContent::TypedLiteral { value, datatype }, - }, - } } else { LiteralContent::TypedLiteral { value, datatype } }) @@ -156,21 +71,11 @@ impl Literal { } /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) - pub fn value(&self) -> Cow<'_, str> { + pub fn value(&self) -> &str { match self.0 { LiteralContent::String(ref value) | LiteralContent::LanguageTaggedString { ref value, .. } - | LiteralContent::TypedLiteral { ref value, .. } => Cow::Borrowed(value), - LiteralContent::Boolean(value) => Cow::Owned(value.to_string()), - LiteralContent::Float(value) => Cow::Owned(value.to_string()), - LiteralContent::Double(value) => Cow::Owned(value.to_string()), - LiteralContent::Integer(value) => Cow::Owned(value.to_string()), - LiteralContent::Decimal(value) => Cow::Owned(value.to_string()), - LiteralContent::Date(value) => Cow::Owned(value.to_string()), - LiteralContent::NaiveDate(value) => Cow::Owned(value.to_string()), - LiteralContent::NaiveTime(value) => Cow::Owned(value.to_string()), - LiteralContent::DateTime(value) => Cow::Owned(value.to_string()), - LiteralContent::NaiveDateTime(value) => Cow::Owned(value.to_string()), + | LiteralContent::TypedLiteral { ref value, .. } => value, } } @@ -193,14 +98,6 @@ impl Literal { match self.0 { LiteralContent::String(_) => &xsd::STRING, LiteralContent::LanguageTaggedString { .. } => &rdf::LANG_STRING, - LiteralContent::Boolean(_) => &xsd::BOOLEAN, - LiteralContent::Float(_) => &xsd::FLOAT, - LiteralContent::Double(_) => &xsd::DOUBLE, - LiteralContent::Integer(_) => &xsd::INTEGER, - LiteralContent::Decimal(_) => &xsd::DECIMAL, - LiteralContent::Date(_) | LiteralContent::NaiveDate(_) => &xsd::DATE, - LiteralContent::NaiveTime(_) => &xsd::TIME, - LiteralContent::DateTime(_) | LiteralContent::NaiveDateTime(_) => &xsd::DATE_TIME, LiteralContent::TypedLiteral { ref datatype, .. } => datatype, } } @@ -215,226 +112,11 @@ impl Literal { _ => false, } } - - /// Checks if the literal has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string) and is valid - pub fn is_string(&self) -> bool { - match self.0 { - LiteralContent::String(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) and is valid - pub fn is_boolean(&self) -> bool { - match self.0 { - LiteralContent::Boolean(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:float](http://www.w3.org/2001/XMLSchema#float) and is valid - pub fn is_float(&self) -> bool { - match self.0 { - LiteralContent::Float(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:double](http://www.w3.org/2001/XMLSchema#double) and is valid - pub fn is_double(&self) -> bool { - match self.0 { - LiteralContent::Double(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:integer](http://www.w3.org/2001/XMLSchema#integer) and is valid - pub fn is_integer(&self) -> bool { - match self.0 { - LiteralContent::Integer(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:decimal](http://www.w3.org/2001/XMLSchema#decimal) or one of its sub datatype and is valid - pub fn is_decimal(&self) -> bool { - match self.0 { - LiteralContent::Integer(_) | LiteralContent::Decimal(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:date](http://www.w3.org/2001/XMLSchema#date) and is valid - pub fn is_date(&self) -> bool { - match self.0 { - LiteralContent::Date(_) | LiteralContent::NaiveDate(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:date](http://www.w3.org/2001/XMLSchema#time) and is valid - pub fn is_time(&self) -> bool { - match self.0 { - LiteralContent::NaiveTime(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:dateTime](http://www.w3.org/2001/XMLSchema#dateTime) or one of its sub datatype and is valid - pub fn is_date_time(&self) -> bool { - match self.0 { - LiteralContent::DateTime(_) | LiteralContent::NaiveDateTime(_) => true, - _ => false, - } - } - - /// Checks if the literal has the datatype [xsd:dateTimeStamp](http://www.w3.org/2001/XMLSchema#dateTimeStamp) or [xsd:dateTime](http://www.w3.org/2001/XMLSchema#dateTime) with a fixed timezone and is valid - pub fn is_date_time_stamp(&self) -> bool { - match self.0 { - LiteralContent::DateTime(_) => true, - _ => false, - } - } - - /// Returns the [effective boolean value](https://www.w3.org/TR/sparql11-query/#ebv) of the literal if it exists - pub fn to_bool(&self) -> Option { - match self.0 { - LiteralContent::String(ref value) => Some(!value.is_empty()), - LiteralContent::Boolean(value) => Some(value), - LiteralContent::Float(value) => Some(!value.is_zero()), - LiteralContent::Double(value) => Some(!value.is_zero()), - LiteralContent::Integer(value) => Some(!value.is_zero()), - LiteralContent::Decimal(value) => Some(!value.is_zero()), - _ => None, - } - } - - /// Returns the value of this literal as an f32 if it exists following the rules of [XPath xsd:float casting](https://www.w3.org/TR/xpath-functions/#casting-to-float) - pub fn to_float(&self) -> Option { - match self.0 { - LiteralContent::Float(value) => value.to_f32(), - LiteralContent::Double(value) => value.to_f32(), - LiteralContent::Integer(value) => value.to_f32(), - LiteralContent::Decimal(value) => value.to_f32(), - LiteralContent::Boolean(value) => Some(if value { 1. } else { 0. }), - LiteralContent::String(ref value) => value.parse().ok(), - _ => None, - } - } - - /// Returns the value of this literal as an f64 if it exists following the rules of [XPath xsd:double casting](https://www.w3.org/TR/xpath-functions/#casting-to-double) - pub fn to_double(&self) -> Option { - match self.0 { - LiteralContent::Float(value) => value.to_f64(), - LiteralContent::Double(value) => value.to_f64(), - LiteralContent::Integer(value) => value.to_f64(), - LiteralContent::Decimal(value) => value.to_f64(), - LiteralContent::Boolean(value) => Some(if value { 1. } else { 0. }), - LiteralContent::String(ref value) => value.parse().ok(), - _ => None, - } - } - - /// Returns the value of this literal as an i128 if it exists following the rules of [XPath xsd:integer casting](https://www.w3.org/TR/xpath-functions/#casting-to-integer) - pub fn to_integer(&self) -> Option { - match self.0 { - LiteralContent::Float(value) => value.to_i128(), - LiteralContent::Double(value) => value.to_i128(), - LiteralContent::Integer(value) => value.to_i128(), - LiteralContent::Decimal(value) => value.to_i128(), - LiteralContent::Boolean(value) => Some(if value { 1 } else { 0 }), - LiteralContent::String(ref value) => value.parse().ok(), - _ => None, - } - } - - /// Returns the value of this literal as Decimal if it exists following the rules of [XPath xsd:decimal casting](https://www.w3.org/TR/xpath-functions/#casting-to-decimal) - pub(crate) fn to_decimal(&self) -> Option { - match self.0 { - LiteralContent::Float(value) => Decimal::from_f32(*value), - LiteralContent::Double(value) => Decimal::from_f64(*value), - LiteralContent::Integer(value) => Decimal::from_i128(value), - LiteralContent::Decimal(value) => Some(value), - LiteralContent::Boolean(value) => Some(if value { - Decimal::one() - } else { - Decimal::zero() - }), - LiteralContent::String(ref value) => value.parse().ok(), - _ => None, - } - } - - /// Returns the value of this literal as NaiveDate if possible - pub(crate) fn to_naive_date(&self) -> Option { - match self.0 { - LiteralContent::Date(value) => Some(value.naive_utc()), - LiteralContent::NaiveDate(value) => Some(value), - _ => None, - } - } - - /// Returns the value of this literal as Date if possible - pub(crate) fn to_date(&self) -> Option> { - match self.0 { - LiteralContent::Date(value) => Some(value), - _ => None, - } - } - - /// Returns the value of this literal as NaiveTime if possible - pub(crate) fn to_time(&self) -> Option { - match self.0 { - LiteralContent::NaiveTime(value) => Some(value), - _ => None, - } - } - - /// Returns the value of this literal as NaiveDateTime if possible - pub(crate) fn to_date_time(&self) -> Option { - match self.0 { - LiteralContent::DateTime(value) => Some(value.naive_utc()), - LiteralContent::NaiveDateTime(value) => Some(value), - _ => None, - } - } - - /// Returns the value of this literal as DateTime if possible - pub(crate) fn to_date_time_stamp(&self) -> Option> { - if let LiteralContent::DateTime(value) = self.0 { - Some(value) - } else { - None - } - } } impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_plain() { - self.language() - .map(|lang| { - rio::Literal::LanguageTaggedString { - value: &self.value(), - language: lang.as_str(), - } - .fmt(f) - }) - .unwrap_or_else(|| { - rio::Literal::Simple { - value: &self.value(), - } - .fmt(f) - }) - } else { - rio::Literal::Typed { - value: &self.value(), - datatype: rio::NamedNode { - iri: self.datatype().as_str(), - }, - } - .fmt(f) - } + rio::Literal::from(self).fmt(f) } } @@ -458,96 +140,165 @@ impl<'a> From> for Literal { impl From for Literal { fn from(value: bool) -> Self { - Literal(LiteralContent::Boolean(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::BOOLEAN.clone(), + }) } } impl From for Literal { fn from(value: i128) -> Self { - Literal(LiteralContent::Integer(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: i64) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: i32) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: i16) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: u64) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: u32) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: u16) -> Self { - Literal(LiteralContent::Integer(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.clone(), + }) } } impl From for Literal { fn from(value: f32) -> Self { - Literal(LiteralContent::Float(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::FLOAT.clone(), + }) } } impl From for Literal { fn from(value: f64) -> Self { - Literal(LiteralContent::Double(value.into())) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::DOUBLE.clone(), + }) } } impl From for Literal { fn from(value: Decimal) -> Self { - Literal(LiteralContent::Decimal(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::DECIMAL.clone(), + }) } } impl From> for Literal { fn from(value: Date) -> Self { - Literal(LiteralContent::Date(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::DATE.clone(), + }) } } impl From for Literal { fn from(value: NaiveDate) -> Self { - Literal(LiteralContent::NaiveDate(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::DATE.clone(), + }) } } impl From for Literal { fn from(value: NaiveTime) -> Self { - Literal(LiteralContent::NaiveTime(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::TIME.clone(), + }) } } impl From> for Literal { fn from(value: DateTime) -> Self { - Literal(LiteralContent::DateTime(value)) + Literal(LiteralContent::TypedLiteral { + value: value.to_rfc3339(), + datatype: xsd::DATE_TIME.clone(), + }) } } impl From for Literal { fn from(value: NaiveDateTime) -> Self { - Literal(LiteralContent::NaiveDateTime(value)) + Literal(LiteralContent::TypedLiteral { + value: value.format("%Y-%m-%dT%H:%M:%S%.f").to_string(), + datatype: xsd::DATE_TIME.clone(), + }) + } +} + +impl<'a> From<&'a Literal> for rio::Literal<'a> { + fn from(literal: &'a Literal) -> Self { + if literal.is_plain() { + literal + .language() + .map(|lang| rio::Literal::LanguageTaggedString { + value: literal.value(), + language: &lang, + }) + .unwrap_or_else(|| rio::Literal::Simple { + value: literal.value(), + }) + } else { + rio::Literal::Typed { + value: literal.value(), + datatype: literal.datatype().into(), + } + } } } diff --git a/lib/src/model/mod.rs b/lib/src/model/mod.rs index c25becac..73c14182 100644 --- a/lib/src/model/mod.rs +++ b/lib/src/model/mod.rs @@ -4,7 +4,6 @@ mod blank_node; mod graph; -mod iri; mod isomorphism; mod literal; mod named_node; @@ -13,8 +12,6 @@ pub mod vocab; pub use crate::model::blank_node::BlankNode; pub use crate::model::graph::SimpleGraph; -pub(crate) use crate::model::iri::Iri; -pub(crate) use crate::model::iri::IriParseError; pub use crate::model::literal::Literal; pub use crate::model::named_node::NamedNode; pub use crate::model::triple::NamedOrBlankNode; diff --git a/lib/src/model/named_node.rs b/lib/src/model/named_node.rs index 2ea810d9..b280755a 100644 --- a/lib/src/model/named_node.rs +++ b/lib/src/model/named_node.rs @@ -1,5 +1,5 @@ -use crate::model::Iri; use crate::Result; +use rio_api::iri::Iri; use rio_api::model as rio; use std::fmt; @@ -20,23 +20,14 @@ pub struct NamedNode { iri: String, } -impl fmt::Display for NamedNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - rio::NamedNode { - iri: self.iri.as_str(), - } - .fmt(f) - } -} - impl NamedNode { /// Builds and validate a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) pub fn parse(iri: impl Into) -> Result { Ok(Self::new_from_iri(Iri::parse(iri.into())?)) } - pub(crate) fn new_from_iri(iri: Iri) -> Self { - Self::new_from_string(iri.into_string()) + pub(crate) fn new_from_iri(iri: Iri) -> Self { + Self::new_from_string(iri.into_inner()) } pub(crate) fn new_from_string(iri: impl Into) -> Self { @@ -51,3 +42,15 @@ impl NamedNode { self.iri } } + +impl fmt::Display for NamedNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + rio::NamedNode::from(self).fmt(f) + } +} + +impl<'a> From<&'a NamedNode> for rio::NamedNode<'a> { + fn from(node: &'a NamedNode) -> Self { + rio::NamedNode { iri: node.as_str() } + } +} diff --git a/lib/src/model/triple.rs b/lib/src/model/triple.rs index 567f8c39..ec334e80 100644 --- a/lib/src/model/triple.rs +++ b/lib/src/model/triple.rs @@ -1,6 +1,7 @@ use crate::model::blank_node::BlankNode; use crate::model::literal::Literal; use crate::model::named_node::NamedNode; +use rio_api::model as rio; use std::fmt; /// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). @@ -47,6 +48,15 @@ impl From for NamedOrBlankNode { } } +impl<'a> From<&'a NamedOrBlankNode> for rio::NamedOrBlankNode<'a> { + fn from(node: &'a NamedOrBlankNode) -> Self { + match node { + NamedOrBlankNode::NamedNode(node) => rio::NamedNode::from(node).into(), + NamedOrBlankNode::BlankNode(node) => rio::BlankNode::from(node).into(), + } + } +} + /// A RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] @@ -80,14 +90,6 @@ impl Term { Term::Literal(_) => true, } } - - /// Returns the [effective boolean value](https://www.w3.org/TR/sparql11-query/#ebv) of the term if it exists - pub fn to_bool(&self) -> Option { - match self { - Term::Literal(literal) => literal.to_bool(), - _ => None, - } - } } impl fmt::Display for Term { @@ -127,6 +129,16 @@ impl From for Term { } } +impl<'a> From<&'a Term> for rio::Term<'a> { + fn from(node: &'a Term) -> Self { + match node { + Term::NamedNode(node) => rio::NamedNode::from(node).into(), + Term::BlankNode(node) => rio::BlankNode::from(node).into(), + Term::Literal(node) => rio::Literal::from(node).into(), + } + } +} + /// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct Triple { @@ -196,6 +208,16 @@ impl fmt::Display for Triple { } } +impl<'a> From<&'a Triple> for rio::Triple<'a> { + fn from(node: &'a Triple) -> Self { + rio::Triple { + subject: node.subject().into(), + predicate: node.predicate().into(), + object: node.object().into(), + } + } +} + /// A [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct Quad { diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 294fd7d2..9946f029 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -32,7 +32,7 @@ type EncodedTuplesIterator<'a> = Box> + #[derive(Clone)] pub struct SimpleEvaluator { dataset: DatasetView, - bnodes_map: Arc>>, + bnodes_map: Arc>>, } impl<'a, S: StoreConnection + 'a> SimpleEvaluator { @@ -477,56 +477,55 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { }, PlanExpression::BNode(id) => match id { Some(id) => match self.eval_expression(id, tuple)? { - EncodedTerm::StringLiteral { value_id } => Some( - self.bnodes_map + EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::BlankNode( + *self + .bnodes_map .lock() .ok()? .entry(value_id) - .or_insert_with(BlankNode::default) - .clone() - .into(), - ), + .or_insert_with(Uuid::new_v4), + )), _ => None, }, - None => Some(BlankNode::default().into()), + None => Some(EncodedTerm::BlankNode(Uuid::new_v4())), }, PlanExpression::Year(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::Date(date) => Some(date.year().into()), - EncodedTerm::NaiveDate(date) => Some(date.year().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.year().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.year().into()), + EncodedTerm::DateLiteral(date) => Some(date.year().into()), + EncodedTerm::NaiveDateLiteral(date) => Some(date.year().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.year().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.year().into()), _ => None, }, PlanExpression::Month(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::Date(date) => Some(date.year().into()), - EncodedTerm::NaiveDate(date) => Some(date.month().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.month().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.month().into()), + EncodedTerm::DateLiteral(date) => Some(date.year().into()), + EncodedTerm::NaiveDateLiteral(date) => Some(date.month().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.month().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.month().into()), _ => None, }, PlanExpression::Day(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::Date(date) => Some(date.year().into()), - EncodedTerm::NaiveDate(date) => Some(date.day().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.day().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.day().into()), + EncodedTerm::DateLiteral(date) => Some(date.year().into()), + EncodedTerm::NaiveDateLiteral(date) => Some(date.day().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.day().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.day().into()), _ => None, }, PlanExpression::Hours(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveTime(time) => Some(time.hour().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.hour().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.hour().into()), + EncodedTerm::NaiveTimeLiteral(time) => Some(time.hour().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.hour().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.hour().into()), _ => None, }, PlanExpression::Minutes(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveTime(time) => Some(time.minute().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.minute().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.minute().into()), + EncodedTerm::NaiveTimeLiteral(time) => Some(time.minute().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.minute().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.minute().into()), _ => None, }, PlanExpression::Seconds(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveTime(time) => Some(time.second().into()), - EncodedTerm::DateTime(date_time) => Some(date_time.second().into()), - EncodedTerm::NaiveDateTime(date_time) => Some(date_time.second().into()), + EncodedTerm::NaiveTimeLiteral(time) => Some(time.second().into()), + EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.second().into()), + EncodedTerm::NaiveDateTimeLiteral(date_time) => Some(date_time.second().into()), _ => None, }, PlanExpression::UUID() => Some(EncodedTerm::NamedNode { @@ -705,9 +704,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveDate(value) => Some(value.into()), - EncodedTerm::DateTime(value) => Some(value.date().naive_utc().into()), //TODO: use date with timezone - EncodedTerm::NaiveDateTime(value) => Some(value.date().into()), + EncodedTerm::NaiveDateLiteral(value) => Some(value.into()), + EncodedTerm::DateTimeLiteral(value) => Some(value.date().naive_utc().into()), //TODO: use date with timezone + EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.date().into()), EncodedTerm::StringLiteral { value_id } => { let value = self.dataset.get_str(value_id).ok()??; Some(NaiveDate::parse_from_str(&value, "%Y-%m-%d").ok()?.into()) @@ -715,9 +714,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::NaiveTime(value) => Some(value.into()), - EncodedTerm::DateTime(value) => Some(value.time().into()), - EncodedTerm::NaiveDateTime(value) => Some(value.time().into()), + EncodedTerm::NaiveTimeLiteral(value) => Some(value.into()), + EncodedTerm::DateTimeLiteral(value) => Some(value.time().into()), + EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.time().into()), EncodedTerm::StringLiteral { value_id } => { let value = self.dataset.get_str(value_id).ok()??; Some(NaiveTime::parse_from_str(&value, "%H:%M:%S").ok()?.into()) @@ -725,8 +724,8 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::DateTime(value) => Some(value.into()), - EncodedTerm::NaiveDateTime(value) => Some(value.into()), + EncodedTerm::DateTimeLiteral(value) => Some(value.into()), + EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.into()), EncodedTerm::StringLiteral { value_id } => { let value = self.dataset.get_str(value_id).ok()??; Some(match DateTime::parse_from_rfc3339(&value) { @@ -772,11 +771,17 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::DoubleLiteral(value) => self.dataset.insert_str(&value.to_string()).ok(), EncodedTerm::IntegerLiteral(value) => self.dataset.insert_str(&value.to_string()).ok(), EncodedTerm::DecimalLiteral(value) => self.dataset.insert_str(&value.to_string()).ok(), - EncodedTerm::Date(value) => self.dataset.insert_str(&value.to_string()).ok(), - EncodedTerm::NaiveDate(value) => self.dataset.insert_str(&value.to_string()).ok(), - EncodedTerm::NaiveTime(value) => self.dataset.insert_str(&value.to_string()).ok(), - EncodedTerm::DateTime(value) => self.dataset.insert_str(&value.to_string()).ok(), - EncodedTerm::NaiveDateTime(value) => self.dataset.insert_str(&value.to_string()).ok(), + EncodedTerm::DateLiteral(value) => self.dataset.insert_str(&value.to_string()).ok(), + EncodedTerm::NaiveDateLiteral(value) => { + self.dataset.insert_str(&value.to_string()).ok() + } + EncodedTerm::NaiveTimeLiteral(value) => { + self.dataset.insert_str(&value.to_string()).ok() + } + EncodedTerm::DateTimeLiteral(value) => self.dataset.insert_str(&value.to_string()).ok(), + EncodedTerm::NaiveDateTimeLiteral(value) => { + self.dataset.insert_str(&value.to_string()).ok() + } } } @@ -953,9 +958,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { | EncodedTerm::LangStringLiteral { .. } => Some(false), _ => None, }, - EncodedTerm::Date(a) => match b { - EncodedTerm::Date(b) => Some(a == b), - EncodedTerm::NaiveDate(b) => { + EncodedTerm::DateLiteral(a) => match b { + EncodedTerm::DateLiteral(b) => Some(a == b), + EncodedTerm::NaiveDateLiteral(b) => { if a.naive_utc() == b { None } else { @@ -965,9 +970,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), }, - EncodedTerm::NaiveDate(a) => match b { - EncodedTerm::NaiveDate(b) => Some(a == b), - EncodedTerm::Date(b) => { + EncodedTerm::NaiveDateLiteral(a) => match b { + EncodedTerm::NaiveDateLiteral(b) => Some(a == b), + EncodedTerm::DateLiteral(b) => { if a == b.naive_utc() { None } else { @@ -977,14 +982,14 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), }, - EncodedTerm::NaiveTime(a) => match b { - EncodedTerm::NaiveTime(b) => Some(a == b), + EncodedTerm::NaiveTimeLiteral(a) => match b { + EncodedTerm::NaiveTimeLiteral(b) => Some(a == b), EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), }, - EncodedTerm::DateTime(a) => match b { - EncodedTerm::DateTime(b) => Some(a == b), - EncodedTerm::NaiveDateTime(b) => { + EncodedTerm::DateTimeLiteral(a) => match b { + EncodedTerm::DateTimeLiteral(b) => Some(a == b), + EncodedTerm::NaiveDateTimeLiteral(b) => { if a.naive_utc() == b { None } else { @@ -994,9 +999,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), }, - EncodedTerm::NaiveDateTime(a) => match b { - EncodedTerm::NaiveDateTime(b) => Some(a == b), - EncodedTerm::DateTime(b) => { + EncodedTerm::NaiveDateTimeLiteral(a) => match b { + EncodedTerm::NaiveDateTimeLiteral(b) => Some(a == b), + EncodedTerm::DateTimeLiteral(b) => { if a == b.naive_utc() { None } else { @@ -1082,31 +1087,31 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::DecimalLiteral(b) => a.partial_cmp(&b), _ => None, }, - EncodedTerm::Date(a) => match b { - EncodedTerm::Date(ref b) => a.partial_cmp(b), - EncodedTerm::NaiveDate(ref b) => a.naive_utc().partial_cmp(b), //TODO: check edges + EncodedTerm::DateLiteral(a) => match b { + EncodedTerm::DateLiteral(ref b) => a.partial_cmp(b), + EncodedTerm::NaiveDateLiteral(ref b) => a.naive_utc().partial_cmp(b), //TODO: check edges _ => None, }, - EncodedTerm::NaiveDate(a) => match b { - EncodedTerm::NaiveDate(ref b) => a.partial_cmp(b), - EncodedTerm::Date(ref b) => a.partial_cmp(&b.naive_utc()), //TODO: check edges + EncodedTerm::NaiveDateLiteral(a) => match b { + EncodedTerm::NaiveDateLiteral(ref b) => a.partial_cmp(b), + EncodedTerm::DateLiteral(ref b) => a.partial_cmp(&b.naive_utc()), //TODO: check edges _ => None, }, - EncodedTerm::NaiveTime(a) => { - if let EncodedTerm::NaiveTime(ref b) = b { + EncodedTerm::NaiveTimeLiteral(a) => { + if let EncodedTerm::NaiveTimeLiteral(ref b) = b { a.partial_cmp(b) } else { None } } - EncodedTerm::DateTime(a) => match b { - EncodedTerm::DateTime(ref b) => a.partial_cmp(b), - EncodedTerm::NaiveDateTime(ref b) => a.naive_utc().partial_cmp(b), //TODO: check edges + EncodedTerm::DateTimeLiteral(a) => match b { + EncodedTerm::DateTimeLiteral(ref b) => a.partial_cmp(b), + EncodedTerm::NaiveDateTimeLiteral(ref b) => a.naive_utc().partial_cmp(b), //TODO: check edges _ => None, }, - EncodedTerm::NaiveDateTime(a) => match b { - EncodedTerm::NaiveDateTime(ref b) => a.partial_cmp(b), - EncodedTerm::DateTime(ref b) => a.partial_cmp(&b.naive_utc()), //TODO: check edges + EncodedTerm::NaiveDateTimeLiteral(a) => match b { + EncodedTerm::NaiveDateTimeLiteral(ref b) => a.partial_cmp(b), + EncodedTerm::DateTimeLiteral(ref b) => a.partial_cmp(&b.naive_utc()), //TODO: check edges _ => None, }, _ => None, diff --git a/lib/src/sparql/json_results.rs b/lib/src/sparql/json_results.rs index 542ee455..7ee607d6 100644 --- a/lib/src/sparql/json_results.rs +++ b/lib/src/sparql/json_results.rs @@ -53,12 +53,12 @@ pub fn write_json_results(results: QueryResult<'_>, mut sink: W) -> Re } Term::BlankNode(bnode) => { sink.write_all(b":{\"type\":\"bnode\",\"value\":")?; - write!(sink, "{}", bnode.as_uuid().to_simple())?; + write!(sink, "{}", bnode.as_str())?; sink.write_all(b"}")?; } Term::Literal(literal) => { sink.write_all(b":{\"type\":\"literal\",\"value\":")?; - write_escaped_json_string(&literal.value(), &mut sink)?; + write_escaped_json_string(literal.value(), &mut sink)?; if let Some(language) = literal.language() { sink.write_all(b",\"xml:lang\":")?; write_escaped_json_string(language, &mut sink)?; diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 60ca73c3..3e27a0e7 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -3,8 +3,9 @@ use crate::sparql::json_results::write_json_results; use crate::sparql::xml_results::{read_xml_results, write_xml_results}; use crate::{FileSyntax, GraphSyntax, Result}; use failure::format_err; -use quick_xml::events::*; -use quick_xml::Writer; +use rio_api::formatter::TriplesFormatter; +use rio_turtle::{NTriplesFormatter, TurtleFormatter}; +use rio_xml::RdfXmlFormatter; use std::fmt; use std::io::{BufRead, Write}; use uuid::Uuid; @@ -34,90 +35,31 @@ impl<'a> QueryResult<'a> { } } - pub fn write_graph(self, mut writer: W, syntax: GraphSyntax) -> Result { + pub fn write_graph(self, write: W, syntax: GraphSyntax) -> Result { if let QueryResult::Graph(triples) = self { - match syntax { - GraphSyntax::NTriples | GraphSyntax::Turtle => { + Ok(match syntax { + GraphSyntax::NTriples => { + let mut formatter = NTriplesFormatter::new(write); for triple in triples { - writeln!(&mut writer, "{}", triple?)? + formatter.format(&(&triple?).into())?; } - Ok(writer) + formatter.finish() } - GraphSyntax::RdfXml => { - let mut writer = Writer::new(writer); - writer.write_event(Event::Decl(BytesDecl::new(b"1.0", None, None)))?; - let mut rdf_open = BytesStart::borrowed_name(b"rdf:RDF"); - rdf_open.push_attribute(( - "xmlns:rdf", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - )); - writer.write_event(Event::Start(rdf_open))?; - - let mut current_subject = None; + GraphSyntax::Turtle => { + let mut formatter = TurtleFormatter::new(write); for triple in triples { - let triple = triple?; - - // We open a new rdf:Description if useful - if current_subject.as_ref() != Some(triple.subject()) { - if current_subject.is_some() { - writer.write_event(Event::End(BytesEnd::borrowed( - b"rdf:Description", - )))?; - } - - let mut description_open = - BytesStart::borrowed_name(b"rdf:Description"); - match triple.subject() { - NamedOrBlankNode::NamedNode(n) => { - description_open.push_attribute(("rdf:about", n.as_str())) - } - NamedOrBlankNode::BlankNode(n) => { - let id = n.as_uuid().to_simple().to_string(); - description_open.push_attribute(("rdf:nodeID", id.as_str())) - } - } - writer.write_event(Event::Start(description_open))?; - } - - let mut property_open = BytesStart::borrowed_name(b"prop:"); - let mut content = None; - property_open.push_attribute(("xmlns:prop", triple.predicate().as_str())); - match triple.object() { - Term::NamedNode(n) => { - property_open.push_attribute(("rdf:resource", n.as_str())) - } - Term::BlankNode(n) => { - let id = n.as_uuid().to_simple().to_string(); - property_open.push_attribute(("rdf:nodeID", id.as_str())) - } - Term::Literal(l) => { - if let Some(language) = l.language() { - property_open.push_attribute(("xml:lang", language.as_str())) - } else if !l.is_plain() { - property_open - .push_attribute(("rdf:datatype", l.datatype().as_str())) - } - content = Some(l.value()); - } - } - if let Some(content) = content { - writer.write_event(Event::Start(property_open))?; - writer.write_event(Event::Text(BytesText::from_plain_str(&content)))?; - writer.write_event(Event::End(BytesEnd::borrowed(b"prop:")))?; - } else { - writer.write_event(Event::Empty(property_open))?; - } - - current_subject = Some(triple.subject_owned()); + formatter.format(&(&triple?).into())?; } - - if current_subject.is_some() { - writer.write_event(Event::End(BytesEnd::borrowed(b"rdf:Description")))?; + formatter.finish()? + } + GraphSyntax::RdfXml => { + let mut formatter = RdfXmlFormatter::new(write)?; + for triple in triples { + formatter.format(&(&triple?).into())?; } - writer.write_event(Event::End(BytesEnd::borrowed(b"rdf:RDF")))?; - Ok(writer.into_inner()) + formatter.finish()? } - } + }) } else { Err(format_err!( "Bindings or booleans could not be formatted as an RDF graph" @@ -246,7 +188,7 @@ impl Default for Variable { impl From for Variable { fn from(blank_node: BlankNode) -> Self { Variable::BlankNode { - id: *blank_node.as_uuid(), + id: blank_node.uuid(), } } } diff --git a/lib/src/sparql/parser.rs b/lib/src/sparql/parser.rs index 6eec3412..5d8f9f99 100644 --- a/lib/src/sparql/parser.rs +++ b/lib/src/sparql/parser.rs @@ -15,6 +15,7 @@ mod grammar { use crate::sparql::algebra::*; use crate::sparql::model::*; use lazy_static::lazy_static; + use rio_api::iri::{Iri, IriParseError}; use std::borrow::Cow; use std::char; use std::collections::HashMap; @@ -292,7 +293,7 @@ mod grammar { } pub struct ParserState { - base_iri: Option, + base_iri: Option>, namespaces: HashMap, bnodes_map: BTreeMap, used_bnodes: BTreeSet, @@ -300,7 +301,7 @@ mod grammar { } impl ParserState { - fn parse_iri(&self, iri: &str) -> Result { + fn parse_iri(&self, iri: &str) -> Result, IriParseError> { if let Some(base_iri) = &self.base_iri { base_iri.resolve(iri) } else { diff --git a/lib/src/sparql/sparql_grammar.rustpeg b/lib/src/sparql/sparql_grammar.rustpeg index 3f48f191..18c42051 100644 --- a/lib/src/sparql/sparql_grammar.rustpeg +++ b/lib/src/sparql/sparql_grammar.rustpeg @@ -25,7 +25,7 @@ BaseDecl -> () = "BASE"i _ i:IRIREF { //[6] PrefixDecl -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { - state.namespaces.insert(ns.into(), i.into_string()); + state.namespaces.insert(ns.into(), i.into_inner()); } //[7] @@ -922,7 +922,7 @@ iri -> NamedNode = i:(IRIREF / PrefixedName) { } //[137] -PrefixedName -> Iri = PNAME_LN / +PrefixedName -> Iri = PNAME_LN / ns:PNAME_NS {? if let Some(iri) = state.namespaces.get(ns).cloned() { Iri::parse(iri).map_err(|_| "IRI parsing failed") } else { @@ -941,7 +941,7 @@ BlankNode -> BlankNode = ANON { BlankNode::default() } //[139] -IRIREF -> Iri = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" {? +IRIREF -> Iri = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}])*) ">" {? state.parse_iri(i).map_err(|_| "IRI parsing failed") } @@ -951,7 +951,7 @@ PNAME_NS -> &'input str = ns:$(PN_PREFIX?) ':' { } //[141] -PNAME_LN -> Iri = ns:PNAME_NS local:$(PN_LOCAL) {? +PNAME_LN -> Iri = ns:PNAME_NS local:$(PN_LOCAL) {? if let Some(iri) = state.namespaces.get(ns).map(|v| v.clone() + &unescape_pn_local(local)) { Iri::parse(iri).map_err(|_| "IRI parsing failed") } else { diff --git a/lib/src/sparql/xml_results.rs b/lib/src/sparql/xml_results.rs index 939f44e9..3106cb18 100644 --- a/lib/src/sparql/xml_results.rs +++ b/lib/src/sparql/xml_results.rs @@ -71,7 +71,7 @@ pub fn write_xml_results(results: QueryResult<'_>, sink: W) -> Result< b"bnode", )))?; writer.write_event(Event::Text(BytesText::from_plain_str( - &bnode.as_uuid().to_simple().to_string(), + bnode.as_str(), )))?; writer.write_event(Event::End(BytesEnd::borrowed(b"bnode")))?; } diff --git a/lib/src/store/mod.rs b/lib/src/store/mod.rs index 9533a352..435eb72a 100644 --- a/lib/src/store/mod.rs +++ b/lib/src/store/mod.rs @@ -13,7 +13,7 @@ use crate::model::*; use crate::sparql::SimplePreparedQuery; use crate::store::numeric_encoder::*; use crate::{DatasetSyntax, GraphSyntax, RepositoryConnection, Result}; -use rio_api::parser::{QuadParser, TripleParser}; +use rio_api::parser::{QuadsParser, TriplesParser}; use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleParser}; use rio_xml::RdfXmlParser; use std::collections::HashMap; @@ -167,7 +167,7 @@ impl RepositoryConnection for StoreRepositoryConnection { } impl StoreRepositoryConnection { - fn load_from_triple_parser( + fn load_from_triple_parser( &self, mut parser: P, to_graph_name: Option<&NamedOrBlankNode>, @@ -185,17 +185,12 @@ impl StoreRepositoryConnection { }; parser.parse_all(&mut move |t| { self.inner - .insert( - &encoder - .encode_rio_triple_in_graph(t, graph_name, &mut bnode_map) - .unwrap(), - ) - .unwrap() + .insert(&encoder.encode_rio_triple_in_graph(t, graph_name, &mut bnode_map)?) })?; Ok(()) } - fn load_from_quad_parser(&self, mut parser: P) -> Result<()> + fn load_from_quad_parser(&self, mut parser: P) -> Result<()> where P::Error: Send + Sync + 'static, { @@ -204,8 +199,7 @@ impl StoreRepositoryConnection { let encoder = self.inner.encoder(); parser.parse_all(&mut move |q| { self.inner - .insert(&encoder.encode_rio_quad(q, &mut bnode_map).unwrap()) - .unwrap() + .insert(&encoder.encode_rio_quad(q, &mut bnode_map)?) })?; Ok(()) } diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 893aa751..d2886458 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -3,6 +3,7 @@ use crate::model::vocab::xsd; use crate::model::*; use crate::Result; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use chrono::format::{parse, Parsed, StrftimeItems}; use chrono::prelude::*; use failure::format_err; use failure::Backtrace; @@ -191,11 +192,11 @@ pub enum EncodedTerm { DoubleLiteral(OrderedFloat), IntegerLiteral(i128), DecimalLiteral(Decimal), - Date(Date), - NaiveDate(NaiveDate), - NaiveTime(NaiveTime), - DateTime(DateTime), - NaiveDateTime(NaiveDateTime), + DateLiteral(Date), + NaiveDateLiteral(NaiveDate), + NaiveTimeLiteral(NaiveTime), + DateTimeLiteral(DateTime), + NaiveDateTimeLiteral(NaiveDateTime), } impl EncodedTerm { @@ -223,11 +224,11 @@ impl EncodedTerm { | EncodedTerm::DoubleLiteral(_) | EncodedTerm::IntegerLiteral(_) | EncodedTerm::DecimalLiteral(_) - | EncodedTerm::Date(_) - | EncodedTerm::NaiveDate(_) - | EncodedTerm::NaiveTime(_) - | EncodedTerm::DateTime(_) - | EncodedTerm::NaiveDateTime(_) => true, + | EncodedTerm::DateLiteral(_) + | EncodedTerm::NaiveDateLiteral(_) + | EncodedTerm::NaiveTimeLiteral(_) + | EncodedTerm::DateTimeLiteral(_) + | EncodedTerm::NaiveDateTimeLiteral(_) => true, _ => false, } } @@ -244,10 +245,10 @@ impl EncodedTerm { EncodedTerm::DoubleLiteral(..) => Some(ENCODED_XSD_DOUBLE_NAMED_NODE), EncodedTerm::IntegerLiteral(..) => Some(ENCODED_XSD_INTEGER_NAMED_NODE), EncodedTerm::DecimalLiteral(..) => Some(ENCODED_XSD_DECIMAL_NAMED_NODE), - EncodedTerm::Date(..) => Some(ENCODED_XSD_DATE_NAMED_NODE), - EncodedTerm::NaiveDate(..) => Some(ENCODED_XSD_DATE_NAMED_NODE), - EncodedTerm::NaiveTime(..) => Some(ENCODED_XSD_TIME_NAMED_NODE), - EncodedTerm::DateTime(..) | EncodedTerm::NaiveDateTime(..) => { + EncodedTerm::DateLiteral(..) => Some(ENCODED_XSD_DATE_NAMED_NODE), + EncodedTerm::NaiveDateLiteral(..) => Some(ENCODED_XSD_DATE_NAMED_NODE), + EncodedTerm::NaiveTimeLiteral(..) => Some(ENCODED_XSD_TIME_NAMED_NODE), + EncodedTerm::DateTimeLiteral(..) | EncodedTerm::NaiveDateTimeLiteral(..) => { Some(ENCODED_XSD_DATE_TIME_NAMED_NODE) } _ => None, @@ -268,11 +269,11 @@ impl EncodedTerm { EncodedTerm::DoubleLiteral(_) => TYPE_DOUBLE_LITERAL, EncodedTerm::IntegerLiteral(_) => TYPE_INTEGER_LITERAL, EncodedTerm::DecimalLiteral(_) => TYPE_DECIMAL_LITERAL, - EncodedTerm::Date(_) => TYPE_DATE_LITERAL, - EncodedTerm::NaiveDate(_) => TYPE_NAIVE_DATE_LITERAL, - EncodedTerm::NaiveTime(_) => TYPE_NAIVE_TIME_LITERAL, - EncodedTerm::DateTime(_) => TYPE_DATE_TIME_LITERAL, - EncodedTerm::NaiveDateTime(_) => TYPE_NAIVE_DATE_TIME_LITERAL, + EncodedTerm::DateLiteral(_) => TYPE_DATE_LITERAL, + EncodedTerm::NaiveDateLiteral(_) => TYPE_NAIVE_DATE_LITERAL, + EncodedTerm::NaiveTimeLiteral(_) => TYPE_NAIVE_TIME_LITERAL, + EncodedTerm::DateTimeLiteral(_) => TYPE_DATE_TIME_LITERAL, + EncodedTerm::NaiveDateTimeLiteral(_) => TYPE_NAIVE_DATE_TIME_LITERAL, } } } @@ -321,37 +322,37 @@ impl From for EncodedTerm { impl From> for EncodedTerm { fn from(value: Date) -> Self { - EncodedTerm::Date(value) + EncodedTerm::DateLiteral(value) } } impl From for EncodedTerm { fn from(value: NaiveDate) -> Self { - EncodedTerm::NaiveDate(value) + EncodedTerm::NaiveDateLiteral(value) } } impl From for EncodedTerm { fn from(value: NaiveTime) -> Self { - EncodedTerm::NaiveTime(value) + EncodedTerm::NaiveTimeLiteral(value) } } impl From> for EncodedTerm { fn from(value: DateTime) -> Self { - EncodedTerm::DateTime(value) + EncodedTerm::DateTimeLiteral(value) } } impl From for EncodedTerm { fn from(value: NaiveDateTime) -> Self { - EncodedTerm::NaiveDateTime(value) + EncodedTerm::NaiveDateTimeLiteral(value) } } impl From for EncodedTerm { fn from(node: BlankNode) -> Self { - EncodedTerm::BlankNode(*node.as_uuid()) + EncodedTerm::BlankNode(node.uuid()) } } @@ -425,24 +426,24 @@ impl TermReader for R { self.read_exact(&mut buffer)?; Ok(EncodedTerm::DecimalLiteral(Decimal::deserialize(buffer))) } - TYPE_DATE_LITERAL => Ok(EncodedTerm::Date(Date::from_utc( + TYPE_DATE_LITERAL => Ok(EncodedTerm::DateLiteral(Date::from_utc( NaiveDate::from_num_days_from_ce_opt(self.read_i32::()?) .ok_or_else(|| format_err!("Invalid date serialization"))?, FixedOffset::east_opt(self.read_i32::()?) .ok_or_else(|| format_err!("Invalid timezone offset"))?, ))), - TYPE_NAIVE_DATE_LITERAL => Ok(EncodedTerm::NaiveDate( + TYPE_NAIVE_DATE_LITERAL => Ok(EncodedTerm::NaiveDateLiteral( NaiveDate::from_num_days_from_ce_opt(self.read_i32::()?) .ok_or_else(|| format_err!("Invalid date serialization"))?, )), - TYPE_NAIVE_TIME_LITERAL => Ok(EncodedTerm::NaiveTime( + TYPE_NAIVE_TIME_LITERAL => Ok(EncodedTerm::NaiveTimeLiteral( NaiveTime::from_num_seconds_from_midnight_opt( self.read_u32::()?, self.read_u32::()?, ) .ok_or_else(|| format_err!("Invalid time serialization"))?, )), - TYPE_DATE_TIME_LITERAL => Ok(EncodedTerm::DateTime(DateTime::from_utc( + TYPE_DATE_TIME_LITERAL => Ok(EncodedTerm::DateTimeLiteral(DateTime::from_utc( NaiveDateTime::from_timestamp_opt( self.read_i64::()?, self.read_u32::()?, @@ -451,7 +452,7 @@ impl TermReader for R { FixedOffset::east_opt(self.read_i32::()?) .ok_or_else(|| format_err!("Invalid timezone offset"))?, ))), - TYPE_NAIVE_DATE_TIME_LITERAL => Ok(EncodedTerm::NaiveDateTime( + TYPE_NAIVE_DATE_TIME_LITERAL => Ok(EncodedTerm::NaiveDateTimeLiteral( NaiveDateTime::from_timestamp_opt( self.read_i64::()?, self.read_u32::()?, @@ -538,23 +539,23 @@ impl TermWriter for R { EncodedTerm::DoubleLiteral(value) => self.write_f64::(*value)?, EncodedTerm::IntegerLiteral(value) => self.write_i128::(value)?, EncodedTerm::DecimalLiteral(value) => self.write_all(&value.serialize())?, - EncodedTerm::Date(value) => { + EncodedTerm::DateLiteral(value) => { self.write_i32::(value.num_days_from_ce())?; self.write_i32::(value.timezone().local_minus_utc())?; } - EncodedTerm::NaiveDate(value) => { + EncodedTerm::NaiveDateLiteral(value) => { self.write_i32::(value.num_days_from_ce())?; } - EncodedTerm::NaiveTime(value) => { + EncodedTerm::NaiveTimeLiteral(value) => { self.write_u32::(value.num_seconds_from_midnight())?; self.write_u32::(value.nanosecond())?; } - EncodedTerm::DateTime(value) => { + EncodedTerm::DateTimeLiteral(value) => { self.write_i64::(value.timestamp())?; self.write_u32::(value.timestamp_subsec_nanos())?; self.write_i32::(value.timezone().local_minus_utc())?; } - EncodedTerm::NaiveDateTime(value) => { + EncodedTerm::NaiveDateTimeLiteral(value) => { self.write_i64::(value.timestamp())?; self.write_u32::(value.timestamp_subsec_nanos())?; } @@ -597,80 +598,15 @@ impl Encoder { } pub fn encode_named_node(&self, named_node: &NamedNode) -> Result { - self.encode_rio_named_node(rio::NamedNode { - iri: named_node.as_str(), - }) + self.encode_rio_named_node(named_node.into()) } pub fn encode_blank_node(&self, blank_node: &BlankNode) -> Result { - Ok(EncodedTerm::BlankNode(*blank_node.as_uuid())) + Ok(EncodedTerm::BlankNode(blank_node.uuid())) } pub fn encode_literal(&self, literal: &Literal) -> Result { - Ok(if let Some(language) = literal.language() { - EncodedTerm::LangStringLiteral { - value_id: self.string_store.insert_str(&literal.value())?, - language_id: self.string_store.insert_str(language.as_str())?, - } - } else if literal.is_string() { - EncodedTerm::StringLiteral { - value_id: self.string_store.insert_str(&literal.value())?, - } - } else if literal.is_boolean() { - literal - .to_bool() - .ok_or_else(|| format_err!("boolean literal without boolean value"))? - .into() - } else if literal.is_float() { - literal - .to_float() - .ok_or_else(|| format_err!("float literal without float value"))? - .into() - } else if literal.is_double() { - literal - .to_double() - .ok_or_else(|| format_err!("double literal without double value"))? - .into() - } else if literal.is_integer() { - literal - .to_integer() - .ok_or_else(|| format_err!("integer literal without integer value"))? - .into() - } else if literal.is_decimal() { - literal - .to_decimal() - .ok_or_else(|| format_err!("decimal literal without decimal value"))? - .into() - } else if literal.is_date() { - if let Some(date) = literal.to_date() { - date.into() - } else { - literal - .to_naive_date() - .ok_or_else(|| format_err!("date literal without date value"))? - .into() - } - } else if literal.is_time() { - literal - .to_time() - .ok_or_else(|| format_err!("time literal without time value"))? - .into() - } else if literal.is_date_time_stamp() { - literal - .to_date_time_stamp() - .ok_or_else(|| format_err!("dateTimeStamp literal without dateTimeStamp value"))? - .into() - } else if literal.is_decimal() { - literal - .to_date_time() - .ok_or_else(|| format_err!("dateTime literal without dateTime value"))? - .into() - } else { - EncodedTerm::TypedLiteral { - value_id: self.string_store.insert_str(&literal.value())?, - datatype_id: self.string_store.insert_str(literal.datatype().as_str())?, - } - }) + self.encode_rio_literal(literal.into()) } pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result { @@ -734,28 +670,128 @@ impl Encoder { } pub fn encode_rio_literal(&self, literal: rio::Literal) -> Result { - match literal { - rio::Literal::Simple { value } => Ok(EncodedTerm::StringLiteral { + Ok(match literal { + rio::Literal::Simple { value } => EncodedTerm::StringLiteral { value_id: self.string_store.insert_str(value)?, - }), + }, rio::Literal::LanguageTaggedString { value, language } => { - Ok(EncodedTerm::LangStringLiteral { + EncodedTerm::LangStringLiteral { value_id: self.string_store.insert_str(value)?, language_id: if language.bytes().all(|b| b.is_ascii_lowercase()) { self.string_store.insert_str(language) } else { self.string_store.insert_str(&language.to_ascii_lowercase()) }?, - }) + } } - rio::Literal::Typed { value, datatype } => { - //TODO: optimize - self.encode_literal(&Literal::new_typed_literal( - value, - NamedNode::new_from_string(datatype.iri), - )) - } - } + rio::Literal::Typed { value, datatype } => match datatype.iri { + "http://www.w3.org/2001/XMLSchema#boolean" => match value { + "true" | "1" => EncodedTerm::BooleanLiteral(true), + "false" | "0" => EncodedTerm::BooleanLiteral(false), + _ => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_BOOLEAN_ID, + }, + }, + "http://www.w3.org/2001/XMLSchema#string" => EncodedTerm::StringLiteral { + value_id: self.string_store.insert_str(value)?, + }, + "http://www.w3.org/2001/XMLSchema#float" => match value.parse() { + Ok(value) => EncodedTerm::FloatLiteral(OrderedFloat(value)), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_FLOAT_ID, + }, + }, + "http://www.w3.org/2001/XMLSchema#double" => match value.parse() { + Ok(value) => EncodedTerm::DoubleLiteral(OrderedFloat(value)), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_DOUBLE_ID, + }, + }, + "http://www.w3.org/2001/XMLSchema#integer" + | "http://www.w3.org/2001/XMLSchema#byte" + | "http://www.w3.org/2001/XMLSchema#short" + | "http://www.w3.org/2001/XMLSchema#int" + | "http://www.w3.org/2001/XMLSchema#long" + | "http://www.w3.org/2001/XMLSchema#unsignedByte" + | "http://www.w3.org/2001/XMLSchema#unsignedShort" + | "http://www.w3.org/2001/XMLSchema#unsignedInt" + | "http://www.w3.org/2001/XMLSchema#unsignedLong" + | "http://www.w3.org/2001/XMLSchema#positiveInteger" + | "http://www.w3.org/2001/XMLSchema#negativeInteger" + | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" + | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => match value.parse() { + Ok(value) => EncodedTerm::IntegerLiteral(value), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: self.string_store.insert_str(datatype.iri)?, + }, + }, + "http://www.w3.org/2001/XMLSchema#decimal" => match value.parse() { + Ok(value) => EncodedTerm::DecimalLiteral(value), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_DECIMAL_ID, + }, + }, + "http://www.w3.org/2001/XMLSchema#date" => { + let mut parsed = Parsed::new(); + match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then( + |_| { + Ok(Date::from_utc( + parsed.to_naive_date()?, + parsed.to_fixed_offset()?, + )) + }, + ) { + Ok(value) => EncodedTerm::DateLiteral(value), + Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") { + Ok(value) => EncodedTerm::DateLiteral(Date::from_utc( + value, + FixedOffset::east(0), + )), + Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%d") { + Ok(value) => EncodedTerm::NaiveDateLiteral(value), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_DATE_ID, + }, + }, + }, + } + } + "http://www.w3.org/2001/XMLSchema#time" => { + match NaiveTime::parse_from_str(&value, "%H:%M:%S") { + Ok(value) => EncodedTerm::NaiveTimeLiteral(value), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_TIME_ID, + }, + } + } + "http://www.w3.org/2001/XMLSchema#dateTime" + | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => { + match DateTime::parse_from_rfc3339(&value) { + Ok(value) => EncodedTerm::DateTimeLiteral(value), + Err(_) => { + match NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S") { + Ok(value) => EncodedTerm::NaiveDateTimeLiteral(value), + Err(_) => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: XSD_DATE_TIME_ID, + }, + } + } + } + } + _ => EncodedTerm::TypedLiteral { + value_id: self.string_store.insert_str(value)?, + datatype_id: self.string_store.insert_str(datatype.iri)?, + }, + }, + }) } pub fn encode_rio_named_or_blank_node( @@ -846,11 +882,11 @@ impl Encoder { EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()), EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(value).into()), EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(value).into()), - EncodedTerm::Date(value) => Ok(Literal::from(value).into()), - EncodedTerm::NaiveDate(value) => Ok(Literal::from(value).into()), - EncodedTerm::NaiveTime(value) => Ok(Literal::from(value).into()), - EncodedTerm::DateTime(value) => Ok(Literal::from(value).into()), - EncodedTerm::NaiveDateTime(value) => Ok(Literal::from(value).into()), + EncodedTerm::DateLiteral(value) => Ok(Literal::from(value).into()), + EncodedTerm::NaiveDateLiteral(value) => Ok(Literal::from(value).into()), + EncodedTerm::NaiveTimeLiteral(value) => Ok(Literal::from(value).into()), + EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(value).into()), + EncodedTerm::NaiveDateTimeLiteral(value) => Ok(Literal::from(value).into()), } } diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 44bacff3..ca163aca 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -203,14 +203,22 @@ fn load_graph_to_repository( } fn load_sparql_query_result_graph(url: &str) -> Result { + let repository = MemoryRepository::default(); + let connection = repository.connection()?; if url.ends_with(".srx") { - to_graph( + for t in to_graph( QueryResult::read(read_file(url)?, QueryResultSyntax::Xml)?, false, - ) + )? { + connection.insert(&t.in_graph(None))?; + } } else { - load_graph(url) + load_graph_to_repository(url, &connection, None)?; } + Ok(connection + .quads_for_pattern(None, None, None, Some(None)) + .map(|q| q.unwrap().into_triple()) + .collect()) } fn to_relative_path(url: &str) -> Result {