Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
oxigraph/lib/oxrdf/src/blank_node.rs

409 lines
12 KiB

use rand::random;
use std::error::Error;
use std::fmt;
use std::io::Write;
use std::str;
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
/// The common way to create a new blank node is to use the [`BlankNode::default()`] function.
///
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function.
/// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars.
///
4 years ago
/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(
/// "_:a122",
/// BlankNode::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct BlankNode(BlankNodeContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum BlankNodeContent {
Named(String),
Anonymous { id: u128, str: IdStr },
}
impl BlankNode {
/// Creates a blank node from a unique identifier.
///
/// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
///that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
let id = id.into();
validate_blank_node_identifier(&id)?;
Ok(Self::new_unchecked(id))
}
/// Creates a blank node from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to N-Triples, Turtle and SPARQL grammars.
///
/// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(id: impl Into<String>) -> Self {
let id = id.into();
if let Some(numerical_id) = to_integer_id(&id) {
Self::new_from_unique_id(numerical_id)
} else {
Self(BlankNodeContent::Named(id))
}
}
/// Creates a blank node from a unique numerical id
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
#[inline]
pub fn new_from_unique_id(id: u128) -> Self {
Self(BlankNodeContent::Anonymous {
id,
str: IdStr::new(id),
})
}
/// Returns the underlying ID of this blank node
#[inline]
pub fn as_str(&self) -> &str {
match &self.0 {
BlankNodeContent::Named(id) => id,
BlankNodeContent::Anonymous { str, .. } => str.as_str(),
}
}
/// Returns the underlying ID of this blank node
#[inline]
pub fn into_string(self) -> String {
match self.0 {
BlankNodeContent::Named(id) => id,
BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(),
}
}
#[inline]
pub fn as_ref(&self) -> BlankNodeRef<'_> {
BlankNodeRef(match &self.0 {
BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()),
BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous {
id: *id,
str: str.as_str(),
},
})
}
}
impl fmt::Display for BlankNode {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id
#[inline]
fn default() -> Self {
Self::new_from_unique_id(random::<u128>())
}
}
/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method.
///
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function.
/// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars.
///
4 years ago
/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// use oxrdf::BlankNodeRef;
///
/// assert_eq!(
/// "_:a122",
/// BlankNodeRef::new("a122")?.to_string()
/// );
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>);
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
enum BlankNodeRefContent<'a> {
Named(&'a str),
Anonymous { id: u128, str: &'a str },
}
impl<'a> BlankNodeRef<'a> {
/// Creates a blank node from a unique identifier.
///
/// The blank node identifier must be valid according to N-Triples, Turtle and SPARQL grammars.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
/// that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: &'a str) -> Result<Self, BlankNodeIdParseError> {
validate_blank_node_identifier(id)?;
Ok(Self::new_unchecked(id))
}
/// Creates a blank node from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to N-Triples, Turtle and SPARQL grammars.
///
/// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(id: &'a str) -> Self {
if let Some(numerical_id) = to_integer_id(id) {
Self(BlankNodeRefContent::Anonymous {
id: numerical_id,
str: id,
})
} else {
Self(BlankNodeRefContent::Named(id))
}
}
/// Returns the underlying ID of this blank node
#[inline]
pub fn as_str(self) -> &'a str {
match self.0 {
BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str,
}
}
/// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`]
///
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(BlankNode::new_from_unique_id(128).as_ref().unique_id(), Some(128));
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[inline]
pub fn unique_id(&self) -> Option<u128> {
match self.0 {
BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id),
}
}
#[inline]
pub fn into_owned(self) -> BlankNode {
BlankNode(match self.0 {
BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()),
BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous {
id,
str: IdStr::new(id),
},
})
}
}
impl fmt::Display for BlankNodeRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "_:{}", self.as_str())
}
}
impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> {
#[inline]
fn from(node: &'a BlankNode) -> Self {
node.as_ref()
}
}
impl<'a> From<BlankNodeRef<'a>> for BlankNode {
#[inline]
fn from(node: BlankNodeRef<'a>) -> Self {
node.into_owned()
}
}
impl PartialEq<BlankNode> for BlankNodeRef<'_> {
#[inline]
fn eq(&self, other: &BlankNode) -> bool {
*self == other.as_ref()
}
}
impl PartialEq<BlankNodeRef<'_>> for BlankNode {
#[inline]
fn eq(&self, other: &BlankNodeRef<'_>) -> bool {
self.as_ref() == *other
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
struct IdStr([u8; 32]);
impl IdStr {
#[inline]
fn new(id: u128) -> Self {
let mut str = [0; 32];
write!(&mut str[..], "{:x}", id).unwrap();
Self(str)
}
#[inline]
fn as_str(&self) -> &str {
let len = self.0.iter().position(|x| x == &0).unwrap_or(32);
str::from_utf8(&self.0[..len]).unwrap()
}
}
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError {})?;
match front {
'0'..='9'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}),
}
for c in chars {
match c {
'.' // validated later
| '-'
| '0'..='9'
| '\u{00B7}'
| '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError {}),
}
}
// Could not end with a dot
if id.ends_with('.') {
Err(BlankNodeIdParseError {})
} else {
Ok(())
}
}
#[inline]
fn to_integer_id(id: &str) -> Option<u128> {
let digits = id.as_bytes();
let mut value: u128 = 0;
if let None | Some(b'0') = digits.first() {
return None; // No empty string or leading zeros
}
for digit in digits {
value = value.checked_mul(16)?.checked_add(
match *digit {
b'0'..=b'9' => digit - b'0',
b'a'..=b'f' => digit - b'a' + 10,
_ => return None,
}
.into(),
)?;
}
Some(value)
}
/// An error raised during [`BlankNode`] IDs validation.
#[allow(missing_copy_implementations)]
#[derive(Debug)]
pub struct BlankNodeIdParseError {}
impl fmt::Display for BlankNodeIdParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The blank node identifier is invalid")
}
}
impl Error for BlankNodeIdParseError {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn as_str_partial() {
let b = BlankNode::new_from_unique_id(0x42);
assert_eq!(b.as_str(), "42");
}
#[test]
fn as_str_full() {
let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000);
assert_eq!(b.as_str(), "77776666555544443333222211110000");
}
#[test]
fn new_validation() {
assert!(BlankNode::new("").is_err());
assert!(BlankNode::new("a").is_ok());
assert!(BlankNode::new("-").is_err());
assert!(BlankNode::new("a-").is_ok());
assert!(BlankNode::new(".").is_err());
assert!(BlankNode::new("a.").is_err());
assert!(BlankNode::new("a.a").is_ok());
}
#[test]
fn new_numerical() {
assert_eq!(
BlankNode::new("100a").unwrap(),
BlankNode::new_from_unique_id(0x100a),
);
assert_ne!(
BlankNode::new("100A").unwrap(),
BlankNode::new_from_unique_id(0x100a)
);
}
#[test]
fn test_equals() {
assert_eq!(
BlankNode::new("100a").unwrap(),
BlankNodeRef::new("100a").unwrap()
);
assert_eq!(
BlankNode::new("zzz").unwrap(),
BlankNodeRef::new("zzz").unwrap()
);
}
}