Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1070 lines
47 KiB
1070 lines
47 KiB
//! Shared parser implementation for Turtle and TriG.
|
|
|
|
use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
|
|
use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError};
|
|
use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
|
|
use oxiri::Iri;
|
|
use oxrdf::vocab::{rdf, xsd};
|
|
#[cfg(feature = "rdf-star")]
|
|
use oxrdf::Triple;
|
|
use oxrdf::{BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term};
|
|
use std::collections::hash_map::Iter;
|
|
use std::collections::HashMap;
|
|
|
|
pub struct TriGRecognizer {
|
|
stack: Vec<TriGState>,
|
|
cur_subject: Vec<Subject>,
|
|
cur_predicate: Vec<NamedNode>,
|
|
cur_object: Vec<Term>,
|
|
cur_graph: GraphName,
|
|
}
|
|
|
|
#[allow(clippy::partial_pub_fields)]
|
|
pub struct TriGRecognizerContext {
|
|
pub lexer_options: N3LexerOptions,
|
|
pub with_graph_name: bool,
|
|
#[cfg(feature = "rdf-star")]
|
|
pub with_quoted_triples: bool,
|
|
prefixes: HashMap<String, Iri<String>>,
|
|
}
|
|
|
|
impl TriGRecognizerContext {
|
|
pub fn prefixes(&self) -> Iter<'_, String, Iri<String>> {
|
|
self.prefixes.iter()
|
|
}
|
|
}
|
|
|
|
impl RuleRecognizer for TriGRecognizer {
|
|
type TokenRecognizer = N3Lexer;
|
|
type Output = Quad;
|
|
type Context = TriGRecognizerContext;
|
|
|
|
fn error_recovery_state(mut self) -> Self {
|
|
self.stack.clear();
|
|
self.cur_subject.clear();
|
|
self.cur_predicate.clear();
|
|
self.cur_object.clear();
|
|
self.cur_graph = GraphName::DefaultGraph;
|
|
self
|
|
}
|
|
|
|
fn recognize_next(
|
|
mut self,
|
|
token: N3Token<'_>,
|
|
context: &mut TriGRecognizerContext,
|
|
results: &mut Vec<Quad>,
|
|
errors: &mut Vec<RuleRecognizerError>,
|
|
) -> Self {
|
|
if let Some(rule) = self.stack.pop() {
|
|
match rule {
|
|
// [1g] trigDoc ::= (directive | block)*
|
|
// [2g] block ::= triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
|
|
// [3] directive ::= prefixID | base | sparqlPrefix | sparqlBase
|
|
// [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.'
|
|
// [5] base ::= '@base' IRIREF '.'
|
|
// [5s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF
|
|
// [6s] sparqlBase ::= "BASE" IRIREF
|
|
TriGState::TriGDoc => {
|
|
self.cur_graph = GraphName::DefaultGraph;
|
|
self.stack.push(TriGState::TriGDoc);
|
|
match token {
|
|
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
|
|
self.stack.push(TriGState::BaseExpectIri);
|
|
self
|
|
}
|
|
N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
|
|
self.stack.push(TriGState::PrefixExpectPrefix);
|
|
self
|
|
}
|
|
N3Token::LangTag("prefix") => {
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::PrefixExpectPrefix);
|
|
self
|
|
}
|
|
N3Token::LangTag("base") => {
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::BaseExpectIri);
|
|
self
|
|
}
|
|
N3Token::PlainKeyword(k)
|
|
if k.eq_ignore_ascii_case("graph") && context.with_graph_name =>
|
|
{
|
|
self.stack.push(TriGState::WrappedGraph);
|
|
self.stack.push(TriGState::GraphName);
|
|
self
|
|
}
|
|
N3Token::Punctuation("{") if context.with_graph_name => {
|
|
self.stack.push(TriGState::WrappedGraph);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
_ => {
|
|
self.stack.push(TriGState::TriplesOrGraph);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
}
|
|
TriGState::ExpectDot => {
|
|
self.cur_subject.pop();
|
|
if token == N3Token::Punctuation(".") {
|
|
self
|
|
} else {
|
|
errors.push("A dot is expected at the end of statements".into());
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::BaseExpectIri => {
|
|
if let N3Token::IriRef(iri) = token {
|
|
context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri));
|
|
self
|
|
} else {
|
|
self.error(errors, "The BASE keyword should be followed by an IRI")
|
|
}
|
|
}
|
|
TriGState::PrefixExpectPrefix => match token {
|
|
N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
|
|
self.stack.push(TriGState::PrefixExpectIri {
|
|
name: prefix.to_owned(),
|
|
});
|
|
self
|
|
}
|
|
_ => self.error(
|
|
errors,
|
|
"The PREFIX keyword should be followed by a prefix like 'ex:'",
|
|
),
|
|
},
|
|
TriGState::PrefixExpectIri { name } => {
|
|
if let N3Token::IriRef(iri) = token {
|
|
context.prefixes.insert(name, Iri::parse_unchecked(iri));
|
|
self
|
|
} else {
|
|
self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI")
|
|
}
|
|
}
|
|
// [3g] triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
|
|
// [4g] triples2 ::= blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
|
|
TriGState::TriplesOrGraph => match token {
|
|
N3Token::IriRef(iri) => {
|
|
self.stack
|
|
.push(TriGState::WrappedGraphOrPredicateObjectList {
|
|
term: NamedNode::new_unchecked(iri).into(),
|
|
});
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.stack
|
|
.push(TriGState::WrappedGraphOrPredicateObjectList {
|
|
term: t.into(),
|
|
});
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.stack
|
|
.push(TriGState::WrappedGraphOrPredicateObjectList {
|
|
term: BlankNode::new_unchecked(label).into(),
|
|
});
|
|
self
|
|
}
|
|
N3Token::Punctuation("[") => {
|
|
self.stack
|
|
.push(TriGState::WrappedGraphBlankNodePropertyListCurrent);
|
|
self
|
|
}
|
|
N3Token::Punctuation("(") => {
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.stack.push(TriGState::SubjectCollectionBeginning);
|
|
self
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Token::Punctuation("<<") if context.with_quoted_triples => {
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.stack.push(TriGState::SubjectQuotedTripleEnd);
|
|
self.stack.push(TriGState::QuotedObject);
|
|
self.stack.push(TriGState::Verb);
|
|
self.stack.push(TriGState::QuotedSubject);
|
|
self
|
|
}
|
|
_ => self.error(errors, "TOKEN is not a valid subject or graph name"),
|
|
},
|
|
TriGState::WrappedGraphOrPredicateObjectList { term } => {
|
|
if token == N3Token::Punctuation("{") && context.with_graph_name {
|
|
self.cur_graph = term.into();
|
|
self.stack.push(TriGState::WrappedGraph);
|
|
} else {
|
|
self.cur_subject.push(term.into());
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
}
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
TriGState::WrappedGraphBlankNodePropertyListCurrent => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.stack
|
|
.push(TriGState::WrappedGraphOrPredicateObjectList {
|
|
term: BlankNode::default().into(),
|
|
});
|
|
self
|
|
} else {
|
|
self.cur_subject.push(BlankNode::default().into());
|
|
self.stack.push(TriGState::ExpectDot);
|
|
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::SubjectBlankNodePropertyListEnd => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.stack
|
|
.push(TriGState::SubjectBlankNodePropertyListAfter);
|
|
self
|
|
} else {
|
|
errors.push("blank node property lists should end with a ']'".into());
|
|
self.stack
|
|
.push(TriGState::SubjectBlankNodePropertyListAfter);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::SubjectBlankNodePropertyListAfter => {
|
|
if matches!(token, N3Token::Punctuation("." | "}")) {
|
|
self.recognize_next(token, context, results, errors)
|
|
} else {
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::SubjectCollectionBeginning => {
|
|
if let N3Token::Punctuation(")") = token {
|
|
self.cur_subject.push(rdf::NIL.into());
|
|
self
|
|
} else {
|
|
let root = BlankNode::default();
|
|
self.cur_subject.push(root.clone().into());
|
|
self.cur_subject.push(root.into());
|
|
self.cur_predicate.push(rdf::FIRST.into());
|
|
self.stack.push(TriGState::SubjectCollectionPossibleEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::SubjectCollectionPossibleEnd => {
|
|
let old = self.cur_subject.pop().unwrap();
|
|
self.cur_object.pop();
|
|
if let N3Token::Punctuation(")") = token {
|
|
self.cur_predicate.pop();
|
|
results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
|
|
self
|
|
} else {
|
|
let new = BlankNode::default();
|
|
results.push(Quad::new(
|
|
old,
|
|
rdf::REST,
|
|
new.clone(),
|
|
self.cur_graph.clone(),
|
|
));
|
|
self.cur_subject.push(new.into());
|
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
// [5g] wrappedGraph ::= '{' triplesBlock? '}'
|
|
// [6g] triplesBlock ::= triples ('.' triplesBlock?)?
|
|
TriGState::WrappedGraph => {
|
|
if token == N3Token::Punctuation("{") {
|
|
self.stack.push(TriGState::WrappedGraphPossibleEnd);
|
|
self.stack.push(TriGState::Triples);
|
|
self
|
|
} else {
|
|
self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'")
|
|
}
|
|
}
|
|
TriGState::WrappedGraphPossibleEnd => {
|
|
self.cur_subject.pop();
|
|
match token {
|
|
N3Token::Punctuation("}") => self,
|
|
N3Token::Punctuation(".") => {
|
|
self.stack.push(TriGState::WrappedGraphPossibleEnd);
|
|
self.stack.push(TriGState::Triples);
|
|
self
|
|
}
|
|
_ => {
|
|
errors.push(
|
|
"A '}' or a '.' is expected at the end of a graph block".into(),
|
|
);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
}
|
|
// [6] triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList?
|
|
// [10] subject ::= iri | BlankNode | collection | quotedTriple
|
|
TriGState::Triples => match token {
|
|
N3Token::Punctuation("}") => {
|
|
self.recognize_next(token, context, results, errors) // Early end
|
|
}
|
|
N3Token::Punctuation("[") => {
|
|
self.cur_subject.push(BlankNode::default().into());
|
|
self.stack
|
|
.push(TriGState::TriplesBlankNodePropertyListCurrent);
|
|
self
|
|
}
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_subject.push(NamedNode::new_unchecked(iri).into());
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_subject.push(t.into());
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.cur_subject
|
|
.push(BlankNode::new_unchecked(label).into());
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self
|
|
}
|
|
N3Token::Punctuation("(") => {
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.stack.push(TriGState::SubjectCollectionBeginning);
|
|
self
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Token::Punctuation("<<") if context.with_quoted_triples => {
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.stack.push(TriGState::SubjectQuotedTripleEnd);
|
|
self.stack.push(TriGState::QuotedObject);
|
|
self.stack.push(TriGState::Verb);
|
|
self.stack.push(TriGState::QuotedSubject);
|
|
self
|
|
}
|
|
_ => self.error(errors, "TOKEN is not a valid RDF subject"),
|
|
},
|
|
TriGState::TriplesBlankNodePropertyListCurrent => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self
|
|
} else {
|
|
self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
// [7g] labelOrSubject ::= iri | BlankNode
|
|
TriGState::GraphName => match token {
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_graph = NamedNode::new_unchecked(iri).into();
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_graph = t.into();
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.cur_graph = BlankNode::new_unchecked(label).into();
|
|
self
|
|
}
|
|
N3Token::Punctuation("[") => {
|
|
self.stack.push(TriGState::GraphNameAnonEnd);
|
|
self
|
|
}
|
|
_ => self.error(errors, "TOKEN is not a valid graph name"),
|
|
},
|
|
TriGState::GraphNameAnonEnd => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.cur_graph = BlankNode::default().into();
|
|
self
|
|
} else {
|
|
self.error(errors, "Anonymous blank node with a property list are not allowed as graph name")
|
|
}
|
|
}
|
|
// [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)*
|
|
TriGState::PredicateObjectList => {
|
|
self.stack.push(TriGState::PredicateObjectListEnd);
|
|
self.stack.push(TriGState::ObjectsList);
|
|
self.stack.push(TriGState::Verb);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
TriGState::PredicateObjectListEnd => {
|
|
self.cur_predicate.pop();
|
|
if token == N3Token::Punctuation(";") {
|
|
self.stack
|
|
.push(TriGState::PredicateObjectListPossibleContinuation);
|
|
self
|
|
} else {
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::PredicateObjectListPossibleContinuation => {
|
|
if token == N3Token::Punctuation(";") {
|
|
self.stack
|
|
.push(TriGState::PredicateObjectListPossibleContinuation);
|
|
self
|
|
} else if matches!(token, N3Token::Punctuation("." | "}" | "]")) {
|
|
self.recognize_next(token, context, results, errors)
|
|
} else {
|
|
self.stack.push(TriGState::PredicateObjectListEnd);
|
|
self.stack.push(TriGState::ObjectsList);
|
|
self.stack.push(TriGState::Verb);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
// [8] objectList ::= object annotation? ( ',' object annotation? )*
|
|
// [30t] annotation ::= '{|' predicateObjectList '|}'
|
|
TriGState::ObjectsList => {
|
|
self.stack.push(TriGState::ObjectsListEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
TriGState::ObjectsListEnd => match token {
|
|
N3Token::Punctuation(",") => {
|
|
self.cur_object.pop();
|
|
self.stack.push(TriGState::ObjectsListEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Token::Punctuation("{|") => {
|
|
let triple = Triple::new(
|
|
self.cur_subject.last().unwrap().clone(),
|
|
self.cur_predicate.last().unwrap().clone(),
|
|
self.cur_object.pop().unwrap(),
|
|
);
|
|
self.cur_subject.push(triple.into());
|
|
self.stack.push(TriGState::AnnotationEnd);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self
|
|
}
|
|
_ => {
|
|
self.cur_object.pop();
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
},
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::AnnotationEnd => {
|
|
self.cur_subject.pop();
|
|
self.stack.push(TriGState::ObjectsListAfterAnnotation);
|
|
if token == N3Token::Punctuation("|}") {
|
|
self
|
|
} else {
|
|
self.error(errors, "Annotations should end with '|}'")
|
|
}
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::ObjectsListAfterAnnotation => {
|
|
if token == N3Token::Punctuation(",") {
|
|
self.stack.push(TriGState::ObjectsListEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self
|
|
} else {
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
// [9] verb ::= predicate | 'a'
|
|
// [11] predicate ::= iri
|
|
TriGState::Verb => match token {
|
|
N3Token::PlainKeyword("a") => {
|
|
self.cur_predicate.push(rdf::TYPE.into());
|
|
self
|
|
}
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_predicate.push(NamedNode::new_unchecked(iri));
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_predicate.push(t);
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
_ => self.error(errors, "TOKEN is not a valid predicate"),
|
|
},
|
|
// [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
|
|
// [13] literal ::= RDFLiteral | NumericLiteral | BooleanLiteral
|
|
// [14] blank ::= BlankNode | collection
|
|
// [15] blankNodePropertyList ::= '[' predicateObjectList ']'
|
|
// [16] collection ::= '(' object* ')'
|
|
// [17] NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
|
|
// [128s] RDFLiteral ::= String (LANGTAG | '^^' iri)?
|
|
// [133s] BooleanLiteral ::= 'true' | 'false'
|
|
// [18] String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
|
|
// [135s] iri ::= IRIREF | PrefixedName
|
|
// [136s] PrefixedName ::= PNAME_LN | PNAME_NS
|
|
// [137s] BlankNode ::= BLANK_NODE_LABEL | ANON
|
|
TriGState::Object => match token {
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_object.push(NamedNode::new_unchecked(iri).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_object.push(t.into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.cur_object.push(BlankNode::new_unchecked(label).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::Punctuation("[") => {
|
|
self.stack
|
|
.push(TriGState::ObjectBlankNodePropertyListCurrent);
|
|
self
|
|
}
|
|
N3Token::Punctuation("(") => {
|
|
self.stack.push(TriGState::ObjectCollectionBeginning);
|
|
self
|
|
}
|
|
N3Token::String(value) => {
|
|
self.stack
|
|
.push(TriGState::LiteralPossibleSuffix { value, emit: true });
|
|
self
|
|
}
|
|
N3Token::Integer(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::Decimal(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::Double(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::PlainKeyword("true") => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
N3Token::PlainKeyword("false") => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
|
|
self.emit_quad(results);
|
|
self
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
N3Token::Punctuation("<<") if context.with_quoted_triples => {
|
|
self.stack
|
|
.push(TriGState::ObjectQuotedTripleEnd { emit: true });
|
|
self.stack.push(TriGState::QuotedObject);
|
|
self.stack.push(TriGState::Verb);
|
|
self.stack.push(TriGState::QuotedSubject);
|
|
self
|
|
}
|
|
_ => self.error(errors, "TOKEN is not a valid RDF object"),
|
|
},
|
|
TriGState::ObjectBlankNodePropertyListCurrent => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.cur_object.push(BlankNode::default().into());
|
|
self.emit_quad(results);
|
|
self
|
|
} else {
|
|
self.cur_subject.push(BlankNode::default().into());
|
|
self.stack.push(TriGState::ObjectBlankNodePropertyListEnd);
|
|
self.stack.push(TriGState::PredicateObjectList);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::ObjectBlankNodePropertyListEnd => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self.cur_object.push(self.cur_subject.pop().unwrap().into());
|
|
self.emit_quad(results);
|
|
self
|
|
} else {
|
|
self.error(errors, "blank node property lists should end with a ']'")
|
|
}
|
|
}
|
|
TriGState::ObjectCollectionBeginning => {
|
|
if let N3Token::Punctuation(")") = token {
|
|
self.cur_object.push(rdf::NIL.into());
|
|
self.emit_quad(results);
|
|
self
|
|
} else {
|
|
let root = BlankNode::default();
|
|
self.cur_object.push(root.clone().into());
|
|
self.emit_quad(results);
|
|
self.cur_subject.push(root.into());
|
|
self.cur_predicate.push(rdf::FIRST.into());
|
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::ObjectCollectionPossibleEnd => {
|
|
let old = self.cur_subject.pop().unwrap();
|
|
self.cur_object.pop();
|
|
if let N3Token::Punctuation(")") = token {
|
|
self.cur_predicate.pop();
|
|
results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
|
|
self
|
|
} else {
|
|
let new = BlankNode::default();
|
|
results.push(Quad::new(
|
|
old,
|
|
rdf::REST,
|
|
new.clone(),
|
|
self.cur_graph.clone(),
|
|
));
|
|
self.cur_subject.push(new.into());
|
|
self.stack.push(TriGState::ObjectCollectionPossibleEnd);
|
|
self.stack.push(TriGState::Object);
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
}
|
|
TriGState::LiteralPossibleSuffix { value, emit } => match token {
|
|
N3Token::LangTag(lang) => {
|
|
self.cur_object.push(
|
|
Literal::new_language_tagged_literal_unchecked(
|
|
value,
|
|
lang.to_ascii_lowercase(),
|
|
)
|
|
.into(),
|
|
);
|
|
if emit {
|
|
self.emit_quad(results);
|
|
}
|
|
self
|
|
}
|
|
N3Token::Punctuation("^^") => {
|
|
self.stack
|
|
.push(TriGState::LiteralExpectDatatype { value, emit });
|
|
self
|
|
}
|
|
_ => {
|
|
self.cur_object
|
|
.push(Literal::new_simple_literal(value).into());
|
|
if emit {
|
|
self.emit_quad(results);
|
|
}
|
|
self.recognize_next(token, context, results, errors)
|
|
}
|
|
},
|
|
TriGState::LiteralExpectDatatype { value, emit } => match token {
|
|
N3Token::IriRef(datatype) => {
|
|
self.cur_object.push(
|
|
Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype))
|
|
.into(),
|
|
);
|
|
if emit {
|
|
self.emit_quad(results);
|
|
}
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(value, t).into());
|
|
if emit {
|
|
self.emit_quad(results);
|
|
}
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
_ => self
|
|
.error(errors, "Expecting a datatype IRI after ^^, found TOKEN")
|
|
.recognize_next(token, context, results, errors),
|
|
},
|
|
// [27t] quotedTriple ::= '<<' qtSubject verb qtObject '>>'
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::SubjectQuotedTripleEnd => {
|
|
let triple = Triple::new(
|
|
self.cur_subject.pop().unwrap(),
|
|
self.cur_predicate.pop().unwrap(),
|
|
self.cur_object.pop().unwrap(),
|
|
);
|
|
self.cur_subject.push(triple.into());
|
|
if token == N3Token::Punctuation(">>") {
|
|
self
|
|
} else {
|
|
self.error(
|
|
errors,
|
|
"Expecting '>>' to close a quoted triple, found TOKEN",
|
|
)
|
|
}
|
|
}
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::ObjectQuotedTripleEnd { emit } => {
|
|
let triple = Triple::new(
|
|
self.cur_subject.pop().unwrap(),
|
|
self.cur_predicate.pop().unwrap(),
|
|
self.cur_object.pop().unwrap(),
|
|
);
|
|
self.cur_object.push(triple.into());
|
|
if emit {
|
|
self.emit_quad(results);
|
|
}
|
|
if token == N3Token::Punctuation(">>") {
|
|
self
|
|
} else {
|
|
self.error(
|
|
errors,
|
|
"Expecting '>>' to close a quoted triple, found TOKEN",
|
|
)
|
|
}
|
|
}
|
|
// [28t] qtSubject ::= iri | BlankNode | quotedTriple
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::QuotedSubject => match token {
|
|
N3Token::Punctuation("[") => {
|
|
self.cur_subject.push(BlankNode::default().into());
|
|
self.stack.push(TriGState::QuotedAnonEnd);
|
|
self
|
|
}
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_subject.push(NamedNode::new_unchecked(iri).into());
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_subject.push(t.into());
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.cur_subject
|
|
.push(BlankNode::new_unchecked(label).into());
|
|
self
|
|
}
|
|
N3Token::Punctuation("<<") => {
|
|
self.stack.push(TriGState::SubjectQuotedTripleEnd);
|
|
self.stack.push(TriGState::QuotedObject);
|
|
self.stack.push(TriGState::Verb);
|
|
self.stack.push(TriGState::QuotedSubject);
|
|
self
|
|
}
|
|
_ => self.error(
|
|
errors,
|
|
"TOKEN is not a valid RDF quoted triple subject: TOKEN",
|
|
),
|
|
},
|
|
// [29t] qtObject ::= iri | BlankNode | literal | quotedTriple
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::QuotedObject => match token {
|
|
N3Token::Punctuation("[") => {
|
|
self.cur_object.push(BlankNode::default().into());
|
|
self.stack.push(TriGState::QuotedAnonEnd);
|
|
self
|
|
}
|
|
N3Token::IriRef(iri) => {
|
|
self.cur_object.push(NamedNode::new_unchecked(iri).into());
|
|
self
|
|
}
|
|
N3Token::PrefixedName {
|
|
prefix,
|
|
local,
|
|
might_be_invalid_iri,
|
|
} => match resolve_local_name(
|
|
prefix,
|
|
&local,
|
|
might_be_invalid_iri,
|
|
&context.prefixes,
|
|
) {
|
|
Ok(t) => {
|
|
self.cur_object.push(t.into());
|
|
self
|
|
}
|
|
Err(e) => self.error(errors, e),
|
|
},
|
|
N3Token::BlankNodeLabel(label) => {
|
|
self.cur_object.push(BlankNode::new_unchecked(label).into());
|
|
self
|
|
}
|
|
N3Token::String(value) => {
|
|
self.stack
|
|
.push(TriGState::LiteralPossibleSuffix { value, emit: false });
|
|
self
|
|
}
|
|
N3Token::Integer(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
|
|
self
|
|
}
|
|
N3Token::Decimal(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
|
|
self
|
|
}
|
|
N3Token::Double(v) => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
|
|
self
|
|
}
|
|
N3Token::PlainKeyword("true") => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
|
|
self
|
|
}
|
|
N3Token::PlainKeyword("false") => {
|
|
self.cur_object
|
|
.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
|
|
self
|
|
}
|
|
N3Token::Punctuation("<<") => {
|
|
self.stack
|
|
.push(TriGState::ObjectQuotedTripleEnd { emit: false });
|
|
self.stack.push(TriGState::QuotedObject);
|
|
self.stack.push(TriGState::Verb);
|
|
self.stack.push(TriGState::QuotedSubject);
|
|
self
|
|
}
|
|
_ => self.error(errors, "TOKEN is not a valid RDF quoted triple object"),
|
|
},
|
|
#[cfg(feature = "rdf-star")]
|
|
TriGState::QuotedAnonEnd => {
|
|
if token == N3Token::Punctuation("]") {
|
|
self
|
|
} else {
|
|
self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples")
|
|
}
|
|
}
|
|
}
|
|
} else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") {
|
|
// TODO: be smarter depending if we are in '{' or not
|
|
self.stack.push(TriGState::TriGDoc);
|
|
self
|
|
} else {
|
|
self
|
|
}
|
|
}
|
|
|
|
fn recognize_end(
|
|
mut self,
|
|
_context: &mut TriGRecognizerContext,
|
|
results: &mut Vec<Self::Output>,
|
|
errors: &mut Vec<RuleRecognizerError>,
|
|
) {
|
|
match &*self.stack {
|
|
[] | [TriGState::TriGDoc] => {
|
|
debug_assert!(
|
|
self.cur_subject.is_empty(),
|
|
"The cur_subject stack must be empty if the state stack is empty"
|
|
);
|
|
debug_assert!(
|
|
self.cur_predicate.is_empty(),
|
|
"The cur_predicate stack must be empty if the state stack is empty"
|
|
);
|
|
debug_assert!(
|
|
self.cur_object.is_empty(),
|
|
"The cur_object stack must be empty if the state stack is empty"
|
|
);
|
|
}
|
|
[.., TriGState::LiteralPossibleSuffix { value, emit: true }] => {
|
|
self.cur_object
|
|
.push(Literal::new_simple_literal(value).into());
|
|
self.emit_quad(results);
|
|
errors.push("Triples should be followed by a dot".into())
|
|
}
|
|
_ => errors.push("Unexpected end".into()), // TODO
|
|
}
|
|
}
|
|
|
|
fn lexer_options(context: &TriGRecognizerContext) -> &N3LexerOptions {
|
|
&context.lexer_options
|
|
}
|
|
}
|
|
|
|
impl TriGRecognizer {
|
|
pub fn new_parser(
|
|
with_graph_name: bool,
|
|
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
|
|
unchecked: bool,
|
|
base_iri: Option<Iri<String>>,
|
|
prefixes: HashMap<String, Iri<String>>,
|
|
) -> Parser<Self> {
|
|
Parser::new(
|
|
Lexer::new(
|
|
N3Lexer::new(N3LexerMode::Turtle, unchecked),
|
|
MIN_BUFFER_SIZE,
|
|
MAX_BUFFER_SIZE,
|
|
true,
|
|
Some(b"#"),
|
|
),
|
|
Self {
|
|
stack: vec![TriGState::TriGDoc],
|
|
cur_subject: Vec::new(),
|
|
cur_predicate: Vec::new(),
|
|
cur_object: Vec::new(),
|
|
cur_graph: GraphName::DefaultGraph,
|
|
},
|
|
TriGRecognizerContext {
|
|
with_graph_name,
|
|
#[cfg(feature = "rdf-star")]
|
|
with_quoted_triples,
|
|
prefixes,
|
|
lexer_options: N3LexerOptions { base_iri },
|
|
},
|
|
)
|
|
}
|
|
|
|
#[must_use]
|
|
fn error(
|
|
mut self,
|
|
errors: &mut Vec<RuleRecognizerError>,
|
|
msg: impl Into<RuleRecognizerError>,
|
|
) -> Self {
|
|
errors.push(msg.into());
|
|
self.stack.clear();
|
|
self.cur_subject.clear();
|
|
self.cur_predicate.clear();
|
|
self.cur_object.clear();
|
|
self.cur_graph = GraphName::DefaultGraph;
|
|
self
|
|
}
|
|
|
|
fn emit_quad(&mut self, results: &mut Vec<Quad>) {
|
|
results.push(Quad::new(
|
|
self.cur_subject.last().unwrap().clone(),
|
|
self.cur_predicate.last().unwrap().clone(),
|
|
self.cur_object.last().unwrap().clone(),
|
|
self.cur_graph.clone(),
|
|
));
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum TriGState {
|
|
TriGDoc,
|
|
ExpectDot,
|
|
BaseExpectIri,
|
|
PrefixExpectPrefix,
|
|
PrefixExpectIri {
|
|
name: String,
|
|
},
|
|
TriplesOrGraph,
|
|
WrappedGraphBlankNodePropertyListCurrent,
|
|
SubjectBlankNodePropertyListEnd,
|
|
SubjectBlankNodePropertyListAfter,
|
|
SubjectCollectionBeginning,
|
|
SubjectCollectionPossibleEnd,
|
|
WrappedGraphOrPredicateObjectList {
|
|
term: NamedOrBlankNode,
|
|
},
|
|
WrappedGraph,
|
|
WrappedGraphPossibleEnd,
|
|
GraphName,
|
|
GraphNameAnonEnd,
|
|
Triples,
|
|
TriplesBlankNodePropertyListCurrent,
|
|
PredicateObjectList,
|
|
PredicateObjectListEnd,
|
|
PredicateObjectListPossibleContinuation,
|
|
ObjectsList,
|
|
ObjectsListEnd,
|
|
#[cfg(feature = "rdf-star")]
|
|
AnnotationEnd,
|
|
#[cfg(feature = "rdf-star")]
|
|
ObjectsListAfterAnnotation,
|
|
Verb,
|
|
Object,
|
|
ObjectBlankNodePropertyListCurrent,
|
|
ObjectBlankNodePropertyListEnd,
|
|
ObjectCollectionBeginning,
|
|
ObjectCollectionPossibleEnd,
|
|
LiteralPossibleSuffix {
|
|
value: String,
|
|
emit: bool,
|
|
},
|
|
LiteralExpectDatatype {
|
|
value: String,
|
|
emit: bool,
|
|
},
|
|
#[cfg(feature = "rdf-star")]
|
|
SubjectQuotedTripleEnd,
|
|
#[cfg(feature = "rdf-star")]
|
|
ObjectQuotedTripleEnd {
|
|
emit: bool,
|
|
},
|
|
#[cfg(feature = "rdf-star")]
|
|
QuotedSubject,
|
|
#[cfg(feature = "rdf-star")]
|
|
QuotedObject,
|
|
#[cfg(feature = "rdf-star")]
|
|
QuotedAnonEnd,
|
|
}
|
|
|