use rand::random; use rio_api::model as rio; use std::error::Error; use std::fmt; use std::io::Write; use std::str; /// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// /// The common way to create a new blank node is to use the [`BlankNode::default`](#impl-Default) function. /// /// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new`](#method.new) function. /// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. /// /// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation: /// ``` /// use oxigraph::model::BlankNode; /// /// assert_eq!( /// "_:a122", /// BlankNode::new("a122")?.to_string() /// ); /// # Result::<_,oxigraph::model::BlankNodeIdParseError>::Ok(()) /// ``` #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct BlankNode(BlankNodeContent); #[derive(PartialEq, Eq, Debug, Clone, Hash)] enum BlankNodeContent { Named(String), Anonymous { id: u128, str: IdStr }, } impl BlankNode { /// Creates a blank node from a unique identifier. /// /// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. /// /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`](#impl-Default) ///that creates a random ID that could be easily inlined by Oxigraph stores. pub fn new(id: impl Into) -> Result { let id = id.into(); validate_blank_node_identifier(&id)?; Ok(Self::new_unchecked(id)) } /// Creates a blank node from a unique identifier without validation. /// /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier /// according to N-Triples, Turtle and SPARQL grammars. /// /// [`new`](#method.new) is a safe version of this constructor and should be used for untrusted data. pub fn new_unchecked(id: impl Into) -> Self { let id = id.into(); if let Some(numerical_id) = to_integer_id(&id) { Self::new_from_unique_id(numerical_id) } else { Self(BlankNodeContent::Named(id)) } } /// Creates a blank node from a unique numerical id /// /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`](#impl-Default). pub fn new_from_unique_id(id: impl Into) -> Self { let id = id.into(); Self(BlankNodeContent::Anonymous { id, str: IdStr::new(id), }) } /// Returns the underlying ID of this blank node #[inline] pub fn as_str(&self) -> &str { match &self.0 { BlankNodeContent::Named(id) => id, BlankNodeContent::Anonymous { str, .. } => str.as_str(), } } /// Returns the underlying ID of this blank node #[inline] pub fn into_string(self) -> String { match self.0 { BlankNodeContent::Named(id) => id, BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(), } } #[inline] pub fn as_ref(&self) -> BlankNodeRef<'_> { BlankNodeRef(match &self.0 { BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()), BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous { id: *id, str: str.as_str(), }, }) } } impl fmt::Display for BlankNode { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.as_ref().fmt(f) } } impl Default for BlankNode { /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id #[inline] fn default() -> Self { Self::new_from_unique_id(random::()) } } /// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// /// The common way to create a new blank node is to use the [`BlankNode::default`](#impl-Default) trait method. /// /// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new`](#method.new) function. /// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. /// /// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation: /// ``` /// use oxigraph::model::BlankNodeRef; /// /// assert_eq!( /// "_:a122", /// BlankNodeRef::new("a122")?.to_string() /// ); /// # Result::<_,oxigraph::model::BlankNodeIdParseError>::Ok(()) /// ``` #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>); #[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] enum BlankNodeRefContent<'a> { Named(&'a str), Anonymous { id: u128, str: &'a str }, } impl<'a> BlankNodeRef<'a> { /// Creates a blank node from a unique identifier. /// /// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars. /// /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`](#impl-Default) /// that creates a random ID that could be easily inlined by Oxigraph stores. pub fn new(id: &'a str) -> Result { validate_blank_node_identifier(id)?; Ok(Self::new_unchecked(id)) } /// Creates a blank node from a unique identifier without validation. /// /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier /// according to N-Triples, Turtle and SPARQL grammars. /// /// [`new`](#method.new) is a safe version of this constructor and should be used for untrusted data. pub fn new_unchecked(id: &'a str) -> Self { if let Some(numerical_id) = to_integer_id(id) { Self(BlankNodeRefContent::Anonymous { id: numerical_id, str: id, }) } else { Self(BlankNodeRefContent::Named(id)) } } /// Returns the underlying ID of this blank node #[inline] pub fn as_str(self) -> &'a str { match self.0 { BlankNodeRefContent::Named(id) => id, BlankNodeRefContent::Anonymous { str, .. } => str, } } /// Returns the internal numerical ID of this blank node, if it exists #[inline] pub(crate) fn id(&self) -> Option { match self.0 { BlankNodeRefContent::Named(_) => None, BlankNodeRefContent::Anonymous { id, .. } => Some(id), } } #[inline] pub fn into_owned(self) -> BlankNode { BlankNode(match self.0 { BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()), BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous { id, str: IdStr::new(id), }, }) } } impl fmt::Display for BlankNodeRef<'_> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { rio::BlankNode::from(*self).fmt(f) } } impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> { #[inline] fn from(node: &'a BlankNode) -> Self { node.as_ref() } } impl<'a> From> for BlankNode { #[inline] fn from(node: BlankNodeRef<'a>) -> Self { node.into_owned() } } impl<'a> From> for rio::BlankNode<'a> { #[inline] fn from(node: BlankNodeRef<'a>) -> Self { rio::BlankNode { id: node.as_str() } } } impl PartialEq for BlankNodeRef<'_> { #[inline] fn eq(&self, other: &BlankNode) -> bool { *self == other.as_ref() } } impl PartialEq> for BlankNode { #[inline] fn eq(&self, other: &BlankNodeRef<'_>) -> bool { self.as_ref() == *other } } #[derive(PartialEq, Eq, Debug, Clone, Hash)] struct IdStr([u8; 32]); impl IdStr { fn new(id: u128) -> Self { let mut str = [0; 32]; write!(&mut str[..], "{:x}", id).unwrap(); Self(str) } fn as_str(&self) -> &str { let len = self.0.iter().position(|x| x == &0).unwrap_or(32); str::from_utf8(&self.0[..len]).unwrap() } } fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { let mut chars = id.chars(); let front = chars.next().ok_or(BlankNodeIdParseError {})?; match front { '0'..='9' | '_' | ':' | 'A'..='Z' | 'a'..='z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}' => (), _ => return Err(BlankNodeIdParseError {}), } for c in chars { match c { '.' // validated later | '-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' | '_' | ':' | 'A'..='Z' | 'a'..='z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}' => (), _ => return Err(BlankNodeIdParseError {}), } } // Could not end with a dot if id.ends_with('.') { Err(BlankNodeIdParseError {}) } else { Ok(()) } } fn to_integer_id(id: &str) -> Option { let digits = id.as_bytes(); let mut value: u128 = 0; if let None | Some(b'0') = digits.first() { return None; // No empty string or leading zeros } for digit in digits { value = value.checked_mul(16)?.checked_add( match *digit { b'0'..=b'9' => digit - b'0', b'a'..=b'f' => digit - b'a' + 10, _ => return None, } .into(), )?; } Some(value) } /// An error raised during [`BlankNode`](struct.BlankNode.html) IDs validation. #[allow(missing_copy_implementations)] #[derive(Debug)] pub struct BlankNodeIdParseError {} impl fmt::Display for BlankNodeIdParseError { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "The blank node identifier is invalid") } } impl Error for BlankNodeIdParseError {} #[cfg(test)] mod test { use super::*; #[test] fn as_str_partial() { let b = BlankNode::new_from_unique_id(0x42_u128); assert_eq!(b.as_str(), "42"); } #[test] fn as_str_full() { let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000_u128); assert_eq!(b.as_str(), "77776666555544443333222211110000"); } #[test] fn new_validation() { assert!(BlankNode::new("").is_err()); assert!(BlankNode::new("a").is_ok()); assert!(BlankNode::new("-").is_err()); assert!(BlankNode::new("a-").is_ok()); assert!(BlankNode::new(".").is_err()); assert!(BlankNode::new("a.").is_err()); assert!(BlankNode::new("a.a").is_ok()); } #[test] fn new_numerical() { assert_eq!( BlankNode::new("100a").unwrap(), BlankNode::new_from_unique_id(0x100a_u128), ); assert_ne!( BlankNode::new("100A").unwrap(), BlankNode::new_from_unique_id(0x100a_u128) ); } #[test] fn test_equals() { assert_eq!( BlankNode::new("100a").unwrap(), BlankNodeRef::new("100a").unwrap() ); assert_eq!( BlankNode::new("zzz").unwrap(), BlankNodeRef::new("zzz").unwrap() ); } }