Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
oxigraph/lib/oxttl/src/n3.rs

1030 lines
40 KiB

//! A [N3](https://w3c.github.io/N3/spec/) streaming parser implemented by [`N3Parser`].
use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
use crate::toolkit::{
FromReadIterator, Lexer, ParseError, Parser, RuleRecognizer, RuleRecognizerError,
};
use crate::{ParseOrIoError, MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
use oxiri::{Iri, IriParseError};
use oxrdf::vocab::{rdf, xsd};
#[cfg(feature = "rdf-star")]
use oxrdf::Triple;
use oxrdf::{
BlankNode, GraphName, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Quad, Subject, Term,
Variable,
};
use std::collections::HashMap;
use std::fmt;
use std::io::Read;
/// A N3 term i.e. a RDF `Term` or a `Variable`.
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum N3Term {
NamedNode(NamedNode),
BlankNode(BlankNode),
Literal(Literal),
#[cfg(feature = "rdf-star")]
Triple(Box<Triple>),
Variable(Variable),
}
impl fmt::Display for N3Term {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NamedNode(term) => term.fmt(f),
Self::BlankNode(term) => term.fmt(f),
Self::Literal(term) => term.fmt(f),
#[cfg(feature = "rdf-star")]
Self::Triple(term) => term.fmt(f),
Self::Variable(term) => term.fmt(f),
}
}
}
impl From<NamedNode> for N3Term {
#[inline]
fn from(node: NamedNode) -> Self {
Self::NamedNode(node)
}
}
impl From<NamedNodeRef<'_>> for N3Term {
#[inline]
fn from(node: NamedNodeRef<'_>) -> Self {
Self::NamedNode(node.into_owned())
}
}
impl From<BlankNode> for N3Term {
#[inline]
fn from(node: BlankNode) -> Self {
Self::BlankNode(node)
}
}
impl From<Literal> for N3Term {
#[inline]
fn from(literal: Literal) -> Self {
Self::Literal(literal)
}
}
#[cfg(feature = "rdf-star")]
impl From<Triple> for N3Term {
#[inline]
fn from(triple: Triple) -> Self {
Self::Triple(Box::new(triple))
}
}
#[cfg(feature = "rdf-star")]
impl From<Box<Triple>> for N3Term {
#[inline]
fn from(node: Box<Triple>) -> Self {
Self::Triple(node)
}
}
impl From<NamedOrBlankNode> for N3Term {
#[inline]
fn from(node: NamedOrBlankNode) -> Self {
match node {
NamedOrBlankNode::NamedNode(node) => node.into(),
NamedOrBlankNode::BlankNode(node) => node.into(),
}
}
}
impl From<Subject> for N3Term {
#[inline]
fn from(node: Subject) -> Self {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => node.into(),
#[cfg(feature = "rdf-star")]
Subject::Triple(triple) => Self::Triple(triple),
}
}
}
impl From<Term> for N3Term {
#[inline]
fn from(node: Term) -> Self {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => node.into(),
Term::Literal(node) => node.into(),
#[cfg(feature = "rdf-star")]
Term::Triple(triple) => Self::Triple(triple),
}
}
}
impl From<Variable> for N3Term {
#[inline]
fn from(variable: Variable) -> Self {
Self::Variable(variable)
}
}
/// A N3 quad i.e. a quad composed of [`N3Term`].
///
/// The `graph_name` is used to encode the formula where the triple is in.
/// In this case the formula is encoded by a blank node.
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct N3Quad {
/// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
pub subject: N3Term,
/// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple.
pub predicate: N3Term,
/// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple.
pub object: N3Term,
/// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is.
pub graph_name: GraphName,
}
impl fmt::Display for N3Quad {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.graph_name == GraphName::DefaultGraph {
write!(f, "{} {} {}", self.subject, self.predicate, self.object)
} else {
write!(
f,
"{} {} {} {}",
self.subject, self.predicate, self.object, self.graph_name
)
}
}
}
impl From<Quad> for N3Quad {
fn from(quad: Quad) -> Self {
Self {
subject: quad.subject.into(),
predicate: quad.predicate.into(),
object: quad.object.into(),
graph_name: quad.graph_name,
}
}
}
/// A [N3](https://w3c.github.io/N3/spec/) streaming parser.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNode, vocab::rdf};
/// use oxttl::n3::{N3Parser, N3Term};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
/// let mut count = 0;
/// for triple in N3Parser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct N3Parser {
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
}
impl N3Parser {
/// Builds a new [`N3Parser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
Ok(self)
}
/// Parses a N3 file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::NamedNode;
/// use oxttl::n3::{N3Parser, N3Term};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = N3Term::NamedNode(NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?);
/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
/// let mut count = 0;
/// for triple in N3Parser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_from_read<R: Read>(&self, read: R) -> FromReadN3Reader<R> {
FromReadN3Reader {
inner: self.parse().parser.parse_from_read(read),
}
}
/// Allows to parse a N3 file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNode, vocab::rdf};
/// use oxttl::n3::{N3Parser, N3Term};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
/// let mut count = 0;
/// let mut parser = N3Parser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse(&self) -> LowLevelN3Reader {
LowLevelN3Reader {
parser: N3Recognizer::new_parser(self.base.clone(), self.prefixes.clone()),
}
}
}
/// Parses a N3 file from a [`Read`] implementation. Can be built using [`N3Parser::parse_from_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNode, vocab::rdf};
/// use oxttl::n3::{N3Parser, N3Term};
///
/// let file = b"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name \"Foo\" .
/// <bar> a schema:Person ;
/// schema:name \"Bar\" .";
///
/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
/// let mut count = 0;
/// for triple in N3Parser::new().parse_from_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct FromReadN3Reader<R: Read> {
inner: FromReadIterator<R, N3Recognizer>,
}
impl<R: Read> Iterator for FromReadN3Reader<R> {
type Item = Result<N3Quad, ParseOrIoError>;
fn next(&mut self) -> Option<Result<N3Quad, ParseOrIoError>> {
self.inner.next()
}
}
/// Parses a N3 file by using a low-level API. Can be built using [`N3Parser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNode, vocab::rdf};
/// use oxttl::n3::{N3Parser, N3Term};
///
/// let file: [&[u8]; 5] = [b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" ."
/// ];
///
/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
/// let mut count = 0;
/// let mut parser = N3Parser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf_type && triple.object == schema_person {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelN3Reader {
parser: Parser<N3Recognizer>,
}
impl LowLevelN3Reader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new quad from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<N3Quad, ParseError>> {
self.parser.read_next()
}
}
#[derive(Clone)]
enum Predicate {
Regular(N3Term),
Inverted(N3Term),
}
struct N3Recognizer {
stack: Vec<N3State>,
lexer_options: N3LexerOptions,
prefixes: HashMap<String, Iri<String>>,
terms: Vec<N3Term>,
predicates: Vec<Predicate>,
contexts: Vec<BlankNode>,
}
impl RuleRecognizer for N3Recognizer {
type TokenRecognizer = N3Lexer;
type Output = N3Quad;
fn error_recovery_state(mut self) -> Self {
self.stack.clear();
self.terms.clear();
self.predicates.clear();
self.contexts.clear();
self
}
fn recognize_next(
mut self,
token: N3Token,
results: &mut Vec<N3Quad>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self {
if let Some(rule) = self.stack.pop() {
match rule {
// [1] n3Doc ::= ( ( n3Statement ".") | sparqlDirective) *
// [2] n3Statement ::= n3Directive | triples
// [3] n3Directive ::= prefixID | base
// [4] sparqlDirective ::= sparqlBase | sparqlPrefix
// [5] sparqlBase ::= BASE IRIREF
// [6] sparqlPrefix ::= PREFIX PNAME_NS IRIREF
// [7] prefixID ::= "@prefix" PNAME_NS IRIREF
// [8] base ::= "@base" IRIREF
N3State::N3Doc => {
self.stack.push(N3State::N3Doc);
match token {
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
self.stack.push(N3State::BaseExpectIri);
self
}
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
self.stack.push(N3State::PrefixExpectPrefix);
self
}
N3Token::LangTag("prefix") => {
self.stack.push(N3State::N3DocExpectDot);
self.stack.push(N3State::PrefixExpectPrefix);
self
}
N3Token::LangTag("base") => {
self.stack.push(N3State::N3DocExpectDot);
self.stack.push(N3State::BaseExpectIri);
self
}
token => {
self.stack.push(N3State::N3DocExpectDot);
self.stack.push(N3State::Triples);
self.recognize_next(token, results, errors)
}
}
},
N3State::N3DocExpectDot => {
if token == N3Token::Punctuation(".") {
self
} else {
errors.push("A dot is expected at the end of N3 statements".into());
self.recognize_next(token, results, errors)
}
},
N3State::BaseExpectIri => match token {
N3Token::IriRef(iri) => {
self.lexer_options.base_iri = Some(iri);
self
}
_ => self.error(errors, "The BASE keyword should be followed by an IRI"),
},
N3State::PrefixExpectPrefix => match token {
N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
self.stack.push(N3State::PrefixExpectIri { name: prefix.to_owned() });
self
}
_ => {
self.error(errors, "The PREFIX keyword should be followed by a prefix like 'ex:'")
}
},
N3State::PrefixExpectIri { name } => match token {
N3Token::IriRef(iri) => {
self.prefixes.insert(name, iri);
self
}
_ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"),
},
// [9] triples ::= subject predicateObjectList?
N3State::Triples => {
self.stack.push(N3State::TriplesMiddle);
self.stack.push(N3State::Path);
self.recognize_next(token, results, errors)
},
N3State::TriplesMiddle => if matches!(token, N3Token::Punctuation("." | "]" | "}" | ")")) {
self.recognize_next(token, results, errors)
} else {
self.stack.push(N3State::TriplesEnd);
self.stack.push(N3State::PredicateObjectList);
self.recognize_next(token, results, errors)
},
N3State::TriplesEnd => {
self.terms.pop();
self.recognize_next(token, results, errors)
},
// [10] predicateObjectList ::= verb objectList ( ";" ( verb objectList) ? ) *
N3State::PredicateObjectList => {
self.stack.push(N3State::PredicateObjectListEnd);
self.stack.push(N3State::ObjectsList);
self.stack.push(N3State::Verb);
self.recognize_next(token, results, errors)
},
N3State::PredicateObjectListEnd => {
self.predicates.pop();
if token == N3Token::Punctuation(";") {
self.stack.push(N3State::PredicateObjectListPossibleContinuation);
self
} else {
self.recognize_next(token, results, errors)
}
},
N3State::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") {
self.stack.push(N3State::PredicateObjectListPossibleContinuation);
self
} else if matches!(token, N3Token::Punctuation(";" | "." | "}" | "]" | ")")) {
self.recognize_next(token, results, errors)
} else {
self.stack.push(N3State::PredicateObjectListEnd);
self.stack.push(N3State::ObjectsList);
self.stack.push(N3State::Verb);
self.recognize_next(token, results, errors)
},
// [11] objectList ::= object ( "," object) *
N3State::ObjectsList => {
self.stack.push(N3State::ObjectsListEnd);
self.stack.push(N3State::Path);
self.recognize_next(token, results, errors)
}
N3State::ObjectsListEnd => {
let object = self.terms.pop().unwrap();
let subject = self.terms.last().unwrap().clone();
results.push(match self.predicates.last().unwrap().clone() {
Predicate::Regular(predicate) => self.quad(
subject,
predicate,
object,
),
Predicate::Inverted(predicate) => self.quad(
object,
predicate,
subject,
)
});
if token == N3Token::Punctuation(",") {
self.stack.push(N3State::ObjectsListEnd);
self.stack.push(N3State::Path);
self
} else {
self.recognize_next(token, results, errors)
}
},
// [12] verb ::= predicate | "a" | ( "has" expression) | ( "is" expression "of") | "=" | "<=" | "=>"
// [14] predicate ::= expression | ( "<-" expression)
N3State::Verb => match token {
N3Token::PlainKeyword("a") => {
self.predicates.push(Predicate::Regular(rdf::TYPE.into()));
self
}
N3Token::PlainKeyword("has") => {
self.stack.push(N3State::AfterRegularVerb);
self.stack.push(N3State::Path);
self
}
N3Token::PlainKeyword("is") => {
self.stack.push(N3State::AfterVerbIs);
self.stack.push(N3State::Path);
self
}
N3Token::Punctuation("=") => {
self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2002/07/owl#sameAs").into()));
self
}
N3Token::Punctuation("=>") => {
self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into()));
self
}
N3Token::Punctuation("<=") => {
self.predicates.push(Predicate::Inverted(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into()));
self
}
N3Token::Punctuation("<-") => {
self.stack.push(N3State::AfterInvertedVerb);
self.stack.push(N3State::Path);
self
}
token => {
self.stack.push(N3State::AfterRegularVerb);
self.stack.push(N3State::Path);
self.recognize_next(token, results, errors)
}
}
N3State::AfterRegularVerb => {
self.predicates.push(Predicate::Regular(self.terms.pop().unwrap()));
self.recognize_next(token, results, errors)
}
N3State::AfterInvertedVerb => {
self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap()));
self.recognize_next(token, results, errors)
}
N3State::AfterVerbIs => match token {
N3Token::PlainKeyword("of") => {
self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap()));
self
},
_ => {
self.error(errors, "The keyword 'is' should be followed by a predicate then by the keyword 'of'")
}
}
// [13] subject ::= expression
// [15] object ::= expression
// [16] expression ::= path
// [17] path ::= pathItem ( ( "!" path) | ( "^" path) ) ?
N3State::Path => {
self.stack.push(N3State::PathFollowUp);
self.stack.push(N3State::PathItem);
self.recognize_next(token, results, errors)
}
N3State::PathFollowUp => match token {
N3Token::Punctuation("!") => {
self.stack.push(N3State::PathAfterIndicator { is_inverse: false });
self.stack.push(N3State::PathItem);
self
}
N3Token::Punctuation("^") => {
self.stack.push(N3State::PathAfterIndicator { is_inverse: true });
self.stack.push(N3State::PathItem);
self
}
token => self.recognize_next(token, results, errors)
},
N3State::PathAfterIndicator { is_inverse } => {
let predicate = self.terms.pop().unwrap();
let previous = self.terms.pop().unwrap();
let current = BlankNode::default();
results.push(if is_inverse { self.quad(current.clone(), predicate, previous) } else { self.quad(previous, predicate, current.clone())});
self.terms.push(current.into());
self.stack.push(N3State::PathFollowUp);
self.recognize_next(token, results, errors)
},
// [18] pathItem ::= iri | blankNode | quickVar | collection | blankNodePropertyList | iriPropertyList | literal | formula
// [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL
// [20] blankNodePropertyList ::= "[" predicateObjectList "]"
// [21] iriPropertyList ::= IPLSTART iri predicateObjectList "]"
// [22] collection ::= "(" object* ")"
// [23] formula ::= "{" formulaContent? "}"
// [25] numericLiteral ::= DOUBLE | DECIMAL | INTEGER
// [26] rdfLiteral ::= STRING ( LANGTAG | ( "^^" iri) ) ?
// [27] iri ::= IRIREF | prefixedName
// [28] prefixedName ::= PNAME_LN | PNAME_NS
// [29] blankNode ::= BLANK_NODE_LABEL | ANON
// [30] quickVar ::= QUICK_VAR_NAME
N3State::PathItem => {
match token {
N3Token::IriRef(iri) => {
self.terms.push(NamedNode::new_unchecked(iri.into_inner()).into());
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.terms.push(t.into());
self
},
Err(e) => self.error(errors, e)
}
N3Token::BlankNodeLabel(bnode) => {
self.terms.push(BlankNode::new_unchecked(bnode).into());
self
}
N3Token::Variable(name) => {
self.terms.push(Variable::new_unchecked(name).into());
self
}
N3Token::Punctuation("[") => {
self.stack.push(N3State::PropertyListMiddle);
self
}
N3Token::Punctuation("(") => {
self.stack.push(N3State::CollectionBeginning);
self
}
N3Token::String(value) => {
self.stack.push(N3State::LiteralPossibleSuffix { value });
self
}
N3Token::Integer(v) => {
self.terms.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
self
}
N3Token::Decimal(v) => {
self.terms.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
self
}
N3Token::Double(v) => {
self.terms.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
self
}
N3Token::PlainKeyword("true") => {
self.terms.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
self
}
N3Token::PlainKeyword("false") => {
self.terms.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
self
}
N3Token::Punctuation("{") => {
self.contexts.push(BlankNode::default());
self.stack.push(N3State::FormulaContent);
self
}
token => self.error(errors, format!("This is not a valid RDF value: {token:?}"))
}
}
N3State::PropertyListMiddle => match token {
N3Token::Punctuation("]") => {
self.terms.push(BlankNode::default().into());
self
},
N3Token::PlainKeyword("id") => {
self.stack.push(N3State::IriPropertyList);
self
},
token => {
self.terms.push(BlankNode::default().into());
self.stack.push(N3State::PropertyListEnd);
self.stack.push(N3State::PredicateObjectList);
self.recognize_next(token, results, errors)
}
}
N3State::PropertyListEnd => if token == N3Token::Punctuation("]") {
self
} else {
errors.push("blank node property lists should end with a ']'".into());
self.recognize_next(token, results, errors)
}
N3State::IriPropertyList => match token {
N3Token::IriRef(id) => {
self.terms.push(NamedNode::new_unchecked(id.into_inner()).into());
self.stack.push(N3State::PropertyListEnd);
self.stack.push(N3State::PredicateObjectList);
self
}
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(t) => {
self.terms.push(t.into());
self.stack.push(N3State::PropertyListEnd);
self.stack.push(N3State::PredicateObjectList);
self
},
Err(e) => self.error(errors, e)
}
_ => {
self.error(errors, "The '[ id' construction should be followed by an IRI")
}
}
N3State::CollectionBeginning => match token {
N3Token::Punctuation(")") => {
self.terms.push(rdf::NIL.into());
self
}
token => {
let root = BlankNode::default();
self.terms.push(root.clone().into());
self.terms.push(root.into());
self.stack.push(N3State::CollectionPossibleEnd);
self.stack.push(N3State::Path);
self.recognize_next(token, results, errors)
}
},
N3State::CollectionPossibleEnd => {
let value = self.terms.pop().unwrap();
let old = self.terms.pop().unwrap();
results.push(self.quad(
old.clone(),
rdf::FIRST,
value,
));
match token {
N3Token::Punctuation(")") => {
results.push(self.quad(old,
rdf::REST,
rdf::NIL
));
self
}
token => {
let new = BlankNode::default();
results.push(self.quad( old,
rdf::REST,
new.clone()
));
self.terms.push(new.into());
self.stack.push(N3State::CollectionPossibleEnd);
self.stack.push(N3State::Path);
self.recognize_next(token, results, errors)
}
}
}
N3State::LiteralPossibleSuffix { value } => {
match token {
N3Token::LangTag(lang) => {
self.terms.push(Literal::new_language_tagged_literal_unchecked(value, lang.to_ascii_lowercase()).into());
self
},
N3Token::Punctuation("^^") => {
self.stack.push(N3State::LiteralExpectDatatype { value });
self
}
token => {
self.terms.push(Literal::new_simple_literal(value).into());
self.recognize_next(token, results, errors)
}
}
}
N3State::LiteralExpectDatatype { value } => {
match token {
N3Token::IriRef(datatype) => {
self.terms.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype.into_inner())).into());
self
},
N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) {
Ok(datatype) =>{
self.terms.push(Literal::new_typed_literal(value, datatype).into());
self
},
Err(e) => self.error(errors, e)
}
token => {
self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors)
}
}
}
// [24] formulaContent ::= ( n3Statement ( "." formulaContent? ) ? ) | ( sparqlDirective formulaContent? )
N3State::FormulaContent => {
match token {
N3Token::Punctuation("}") => {
self.terms.push(self.contexts.pop().unwrap().into());
self
}
N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("base") => {
self.stack.push(N3State::FormulaContent);
self.stack.push(N3State::BaseExpectIri);
self
}
N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("prefix") => {
self.stack.push(N3State::FormulaContent);
self.stack.push(N3State::PrefixExpectPrefix);
self
}
N3Token::LangTag("prefix") => {
self.stack.push(N3State::FormulaContentExpectDot);
self.stack.push(N3State::PrefixExpectPrefix);
self
}
N3Token::LangTag("base") => {
self.stack.push(N3State::FormulaContentExpectDot);
self.stack.push(N3State::BaseExpectIri);
self
}
token => {
self.stack.push(N3State::FormulaContentExpectDot);
self.stack.push(N3State::Triples);
self.recognize_next(token, results, errors)
}
}
}
N3State::FormulaContentExpectDot => {
match token {
N3Token::Punctuation("}") => {
self.terms.push(self.contexts.pop().unwrap().into());
self
}
N3Token::Punctuation(".") => {
self.stack.push(N3State::FormulaContent);
self
}
token => {
errors.push("A dot is expected at the end of N3 statements".into());
self.stack.push(N3State::FormulaContent);
self.recognize_next(token, results, errors)
}
}
}
}
} else if token == N3Token::Punctuation(".") {
self.stack.push(N3State::N3Doc);
self
} else {
self
}
}
fn recognize_end(
self,
_results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
) {
match &*self.stack {
[] | [N3State::N3Doc] => (),
_ => errors.push("Unexpected end".into()), //TODO
}
}
fn lexer_options(&self) -> &N3LexerOptions {
&self.lexer_options
}
}
impl N3Recognizer {
pub fn new_parser(
base_iri: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::N3),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,
Some(b"#"),
),
N3Recognizer {
stack: vec![N3State::N3Doc],
lexer_options: N3LexerOptions { base_iri },
prefixes,
terms: Vec::new(),
predicates: Vec::new(),
contexts: Vec::new(),
},
)
}
#[must_use]
fn error(
mut self,
errors: &mut Vec<RuleRecognizerError>,
msg: impl Into<RuleRecognizerError>,
) -> Self {
errors.push(msg.into());
self.stack.clear();
self
}
fn quad(
&self,
subject: impl Into<N3Term>,
predicate: impl Into<N3Term>,
object: impl Into<N3Term>,
) -> N3Quad {
N3Quad {
subject: subject.into(),
predicate: predicate.into(),
object: object.into(),
graph_name: self
.contexts
.last()
.map_or(GraphName::DefaultGraph, |g| g.clone().into()),
}
}
}
#[derive(Debug)]
enum N3State {
N3Doc,
N3DocExpectDot,
BaseExpectIri,
PrefixExpectPrefix,
PrefixExpectIri { name: String },
Triples,
TriplesMiddle,
TriplesEnd,
PredicateObjectList,
PredicateObjectListEnd,
PredicateObjectListPossibleContinuation,
ObjectsList,
ObjectsListEnd,
Verb,
AfterRegularVerb,
AfterInvertedVerb,
AfterVerbIs,
Path,
PathFollowUp,
PathAfterIndicator { is_inverse: bool },
PathItem,
PropertyListMiddle,
PropertyListEnd,
IriPropertyList,
CollectionBeginning,
CollectionPossibleEnd,
LiteralPossibleSuffix { value: String },
LiteralExpectDatatype { value: String },
FormulaContent,
FormulaContentExpectDot,
}