Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
oxigraph/lib/oxrdf/src/literal.rs

537 lines
17 KiB

use crate::named_node::NamedNode;
use crate::vocab::rdf;
use crate::vocab::xsd;
use crate::NamedNodeRef;
use oxilangtag::{LanguageTag, LanguageTagParseError};
use std::borrow::Cow;
use std::fmt;
use std::fmt::Write;
use std::option::Option;
/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
///
4 years ago
/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::Literal;
/// use oxrdf::vocab::xsd;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
/// Literal::new_simple_literal("foo\nbar").to_string()
/// );
///
/// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
///
/// assert_eq!(
/// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// );
/// # Result::<(), LanguageTagParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
enum LiteralContent {
String(String),
LanguageTaggedString { value: String, language: String },
TypedLiteral { value: String, datatype: NamedNode },
}
impl Literal {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub fn new_simple_literal(value: impl Into<String>) -> Self {
Self(LiteralContent::String(value.into()))
}
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
#[inline]
pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self {
let value = value.into();
let datatype = datatype.into();
Self(if datatype == xsd::STRING {
LiteralContent::String(value)
} else {
LiteralContent::TypedLiteral { value, datatype }
})
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
#[inline]
pub fn new_language_tagged_literal(
value: impl Into<String>,
language: impl Into<String>,
) -> Result<Self, LanguageTagParseError> {
let mut language = language.into();
language.make_ascii_lowercase();
Ok(Self::new_language_tagged_literal_unchecked(
value,
LanguageTag::parse(language)?.into_inner(),
))
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// It is the responsibility of the caller to check that `language`
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
/// and is lowercase.
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_language_tagged_literal_unchecked(
value: impl Into<String>,
language: impl Into<String>,
) -> Self {
Self(LiteralContent::LanguageTaggedString {
value: value.into(),
language: language.into(),
})
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form).
#[inline]
pub fn value(&self) -> &str {
self.as_ref().value()
}
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub fn language(&self) -> Option<&str> {
self.as_ref().language()
}
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](http://www.w3.org/1999/02/22-rdf-syntax-ns#langString).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline]
pub fn datatype(&self) -> NamedNodeRef<'_> {
self.as_ref().datatype()
}
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal).
///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline]
pub fn is_plain(&self) -> bool {
self.as_ref().is_plain()
}
#[inline]
pub fn as_ref(&self) -> LiteralRef<'_> {
LiteralRef(match &self.0 {
LiteralContent::String(value) => LiteralRefContent::String(value),
LiteralContent::LanguageTaggedString { value, language } => {
LiteralRefContent::LanguageTaggedString { value, language }
}
LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral {
value,
datatype: datatype.as_ref(),
},
})
}
/// Extract components from this literal
#[inline]
pub fn destruct(self) -> (String, Option<NamedNode>, Option<String>) {
match self.0 {
LiteralContent::String(s) => (s, None, None),
LiteralContent::LanguageTaggedString { value, language } => {
(value, None, Some(language))
}
LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None),
}
}
}
impl fmt::Display for Literal {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
impl<'a> From<&'a str> for Literal {
#[inline]
fn from(value: &'a str) -> Self {
Self(LiteralContent::String(value.into()))
}
}
impl From<String> for Literal {
#[inline]
fn from(value: String) -> Self {
Self(LiteralContent::String(value))
}
}
impl<'a> From<Cow<'a, str>> for Literal {
#[inline]
fn from(value: Cow<'a, str>) -> Self {
Self(LiteralContent::String(value.into()))
}
}
impl From<bool> for Literal {
#[inline]
fn from(value: bool) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::BOOLEAN.into(),
})
}
}
impl From<i128> for Literal {
#[inline]
fn from(value: i128) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i64> for Literal {
#[inline]
fn from(value: i64) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i32> for Literal {
#[inline]
fn from(value: i32) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i16> for Literal {
#[inline]
fn from(value: i16) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u64> for Literal {
#[inline]
fn from(value: u64) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u32> for Literal {
#[inline]
fn from(value: u32) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u16> for Literal {
#[inline]
fn from(value: u16) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<f32> for Literal {
#[inline]
fn from(value: f32) -> Self {
Self(LiteralContent::TypedLiteral {
value: if value == f32::INFINITY {
"INF".to_string()
} else if value == f32::NEG_INFINITY {
"-INF".to_string()
} else {
value.to_string()
},
datatype: xsd::FLOAT.into(),
})
}
}
impl From<f64> for Literal {
#[inline]
fn from(value: f64) -> Self {
Self(LiteralContent::TypedLiteral {
value: if value == f64::INFINITY {
"INF".to_string()
} else if value == f64::NEG_INFINITY {
"-INF".to_string()
} else {
value.to_string()
},
datatype: xsd::DOUBLE.into(),
})
}
}
/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
///
4 years ago
/// The default string formatter is returning an N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// use oxrdf::LiteralRef;
/// use oxrdf::vocab::xsd;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
/// LiteralRef::new_simple_literal("foo\nbar").to_string()
/// );
///
/// assert_eq!(
/// "\"1999-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>",
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub struct LiteralRef<'a>(LiteralRefContent<'a>);
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
enum LiteralRefContent<'a> {
String(&'a str),
LanguageTaggedString {
value: &'a str,
language: &'a str,
},
TypedLiteral {
value: &'a str,
datatype: NamedNodeRef<'a>,
},
}
impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
#[inline]
pub fn new_typed_literal(value: &'a str, datatype: impl Into<NamedNodeRef<'a>>) -> Self {
let datatype = datatype.into();
LiteralRef(if datatype == xsd::STRING {
LiteralRefContent::String(value)
} else {
LiteralRefContent::TypedLiteral { value, datatype }
})
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// It is the responsibility of the caller to check that `language`
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
/// and is lowercase.
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline]
pub fn value(self) -> &'a str {
match self.0 {
LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. }
| LiteralRefContent::TypedLiteral { value, .. } => value,
}
}
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub fn language(self) -> Option<&'a str> {
match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None,
}
}
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](http://www.w3.org/1999/02/22-rdf-syntax-ns#langString).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline]
pub fn datatype(self) -> NamedNodeRef<'a> {
match self.0 {
LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
LiteralRefContent::TypedLiteral { datatype, .. } => datatype,
}
}
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/rdf-concepts/#dfn-plain-literal).
///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string).
#[inline]
pub fn is_plain(self) -> bool {
matches!(
self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
)
}
#[inline]
pub fn into_owned(self) -> Literal {
Literal(match self.0 {
LiteralRefContent::String(value) => LiteralContent::String(value.to_owned()),
LiteralRefContent::LanguageTaggedString { value, language } => {
LiteralContent::LanguageTaggedString {
value: value.to_owned(),
language: language.to_owned(),
}
}
LiteralRefContent::TypedLiteral { value, datatype } => LiteralContent::TypedLiteral {
value: value.to_owned(),
datatype: datatype.into_owned(),
},
})
}
/// Extract components from this literal
#[inline]
pub fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 {
LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => {
(value, None, Some(language))
}
LiteralRefContent::TypedLiteral { value, datatype } => (value, Some(datatype), None),
}
}
}
impl fmt::Display for LiteralRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0 {
LiteralRefContent::String(value) => print_quoted_str(value, f),
LiteralRefContent::LanguageTaggedString { value, language } => {
print_quoted_str(value, f)?;
write!(f, "@{}", language)
}
LiteralRefContent::TypedLiteral { value, datatype } => {
print_quoted_str(value, f)?;
write!(f, "^^{}", datatype)
}
}
}
}
impl<'a> From<&'a Literal> for LiteralRef<'a> {
#[inline]
fn from(node: &'a Literal) -> Self {
node.as_ref()
}
}
impl<'a> From<LiteralRef<'a>> for Literal {
#[inline]
fn from(node: LiteralRef<'a>) -> Self {
node.into_owned()
}
}
impl<'a> From<&'a str> for LiteralRef<'a> {
#[inline]
fn from(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
}
impl PartialEq<Literal> for LiteralRef<'_> {
#[inline]
fn eq(&self, other: &Literal) -> bool {
*self == other.as_ref()
}
}
impl PartialEq<LiteralRef<'_>> for Literal {
#[inline]
fn eq(&self, other: &LiteralRef<'_>) -> bool {
self.as_ref() == *other
}
}
#[inline]
pub(crate) fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?;
for c in string.chars() {
match c {
'\n' => f.write_str("\\n"),
'\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"),
c => f.write_char(c),
}?;
}
f.write_char('"')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_literal_equality() {
assert_eq!(
Literal::new_simple_literal("foo"),
Literal::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
Literal::new_simple_literal("foo"),
LiteralRef::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
LiteralRef::new_simple_literal("foo"),
Literal::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
LiteralRef::new_simple_literal("foo"),
LiteralRef::new_typed_literal("foo", xsd::STRING)
);
}
#[test]
fn test_float_format() {
assert_eq!("INF", Literal::from(f32::INFINITY).value());
assert_eq!("INF", Literal::from(f64::INFINITY).value());
assert_eq!("-INF", Literal::from(f32::NEG_INFINITY).value());
assert_eq!("-INF", Literal::from(f64::NEG_INFINITY).value());
assert_eq!("NaN", Literal::from(f32::NAN).value());
assert_eq!("NaN", Literal::from(f64::NAN).value());
}
}