diff --git a/lib/oxrdfio/src/parser.rs b/lib/oxrdfio/src/parser.rs index 25f86598..bc88cfc4 100644 --- a/lib/oxrdfio/src/parser.rs +++ b/lib/oxrdfio/src/parser.rs @@ -272,19 +272,19 @@ impl RdfParser { /// Reads are buffered. /// /// ``` - /// use oxrdfio::{RdfFormat, RdfParser, ParseError}; + /// use oxrdfio::{RdfFormat, RdfParser}; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = " ."; + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxrdfio::ParseError> { + /// let file = " ."; /// - /// let parser = RdfParser::from_format(RdfFormat::NTriples); - /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); - /// if let Some(quad) = reader.next().await { - /// assert_eq!(quad?.subject.to_string(), ""); - /// } - /// Ok(()) + /// let parser = RdfParser::from_format(RdfFormat::NTriples); + /// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); + /// if let Some(quad) = reader.next().await { + /// assert_eq!(quad?.subject.to_string(), ""); /// } + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -390,19 +390,19 @@ impl Iterator for FromReadQuadReader { /// Reads are buffered. /// /// ``` -/// use oxrdfio::{RdfFormat, RdfParser, ParseError}; +/// use oxrdfio::{RdfFormat, RdfParser}; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = " ."; +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxrdfio::ParseError> { +/// let file = " ."; /// -/// let parser = RdfParser::from_format(RdfFormat::NTriples); -/// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); -/// if let Some(quad) = reader.next().await { -/// assert_eq!(quad?.subject.to_string(), ""); -/// } -/// Ok(()) +/// let parser = RdfParser::from_format(RdfFormat::NTriples); +/// let mut reader = parser.parse_tokio_async_read(file.as_bytes()); +/// if let Some(quad) = reader.next().await { +/// assert_eq!(quad?.subject.to_string(), ""); /// } +/// # Ok(()) +/// # } /// ``` #[must_use] #[cfg(feature = "async-tokio")] diff --git a/lib/oxrdfio/src/serializer.rs b/lib/oxrdfio/src/serializer.rs index 0d931d81..9bfaceec 100644 --- a/lib/oxrdfio/src/serializer.rs +++ b/lib/oxrdfio/src/serializer.rs @@ -114,23 +114,22 @@ impl RdfSerializer { /// ``` /// use oxrdfio::{RdfFormat, RdfSerializer}; /// use oxrdf::{Quad, NamedNode}; - /// use std::io; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> io::Result<()> { - /// let mut buffer = Vec::new(); - /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); - /// writer.write_quad(&Quad { - /// subject: NamedNode::new_unchecked("http://example.com/s").into(), - /// predicate: NamedNode::new_unchecked("http://example.com/p"), - /// object: NamedNode::new_unchecked("http://example.com/o").into(), - /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() - /// }).await?; - /// writer.finish().await?; + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut buffer = Vec::new(); + /// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); + /// writer.write_quad(&Quad { + /// subject: NamedNode::new_unchecked("http://example.com/s").into(), + /// predicate: NamedNode::new_unchecked("http://example.com/p"), + /// object: NamedNode::new_unchecked("http://example.com/o").into(), + /// graph_name: NamedNode::new_unchecked("http://example.com/g").into() + /// }).await?; + /// writer.finish().await?; /// - /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); - /// Ok(()) - /// } + /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( @@ -239,23 +238,22 @@ impl ToWriteQuadWriter { /// ``` /// use oxrdfio::{RdfFormat, RdfSerializer}; /// use oxrdf::{Quad, NamedNode}; -/// use std::io; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> io::Result<()> { -/// let mut buffer = Vec::new(); -/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); -/// writer.write_quad(&Quad { -/// subject: NamedNode::new_unchecked("http://example.com/s").into(), -/// predicate: NamedNode::new_unchecked("http://example.com/p"), -/// object: NamedNode::new_unchecked("http://example.com/o").into(), -/// graph_name: NamedNode::new_unchecked("http://example.com/g").into() -/// }).await?; -/// writer.finish().await?; +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut buffer = Vec::new(); +/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(&mut buffer); +/// writer.write_quad(&Quad { +/// subject: NamedNode::new_unchecked("http://example.com/s").into(), +/// predicate: NamedNode::new_unchecked("http://example.com/p"), +/// object: NamedNode::new_unchecked("http://example.com/o").into(), +/// graph_name: NamedNode::new_unchecked("http://example.com/g").into() +/// }).await?; +/// writer.finish().await?; /// -/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); -/// Ok(()) -/// } +/// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); +/// # Ok(()) +/// # } /// ``` #[must_use] #[cfg(feature = "async-tokio")] diff --git a/lib/oxrdfxml/src/parser.rs b/lib/oxrdfxml/src/parser.rs index dcd216a4..086d094a 100644 --- a/lib/oxrdfxml/src/parser.rs +++ b/lib/oxrdfxml/src/parser.rs @@ -108,31 +108,31 @@ impl RdfXmlParser { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; - /// use oxrdfxml::{RdfXmlParser, ParseError}; + /// use oxrdfxml::RdfXmlParser; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxrdfxml::ParseError> { /// let file = b" - /// - /// - /// - /// Foo - /// - /// - /// "; + /// + /// + /// + /// Foo + /// + /// + /// "; /// - /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); - /// let mut count = 0; - /// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { - /// count += 1; - /// } + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -234,31 +234,31 @@ impl FromReadRdfXmlReader { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; -/// use oxrdfxml::{RdfXmlParser, ParseError}; +/// use oxrdfxml::RdfXmlParser; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxrdfxml::ParseError> { /// let file = b" -/// -/// -/// -/// Foo -/// -/// -/// "; +/// +/// +/// +/// Foo +/// +/// +/// "; /// -/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); -/// let mut count = 0; -/// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { -/// count += 1; -/// } +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = RdfXmlParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] diff --git a/lib/oxrdfxml/src/serializer.rs b/lib/oxrdfxml/src/serializer.rs index d3e9949f..01e5b4b4 100644 --- a/lib/oxrdfxml/src/serializer.rs +++ b/lib/oxrdfxml/src/serializer.rs @@ -74,22 +74,21 @@ impl RdfXmlSerializer { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxrdfxml::RdfXmlSerializer; - /// use std::io::Result; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<()> { - /// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// )).await?; - /// assert_eq!( - /// b"\n\n\t\n\t\t\n\t\n", - /// writer.finish().await?.as_slice() - /// ); - /// Ok(()) - /// } + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )).await?; + /// assert_eq!( + /// b"\n\n\t\n\t\t\n\t\n", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } /// ``` #[allow(clippy::unused_self)] #[cfg(feature = "async-tokio")] @@ -160,22 +159,21 @@ impl ToWriteRdfXmlWriter { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxrdfxml::RdfXmlSerializer; -/// use std::io::Result; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<()> { -/// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person"), -/// )).await?; -/// assert_eq!( -/// b"\n\n\t\n\t\t\n\t\n", -/// writer.finish().await?.as_slice() -/// ); -/// Ok(()) -/// } +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = RdfXmlSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// )).await?; +/// assert_eq!( +/// b"\n\n\t\n\t\t\n\t\n", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] diff --git a/lib/oxttl/src/line_formats.rs b/lib/oxttl/src/line_formats.rs index 1b4c31e6..4d6919a5 100644 --- a/lib/oxttl/src/line_formats.rs +++ b/lib/oxttl/src/line_formats.rs @@ -9,13 +9,15 @@ use oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term}; pub struct NQuadsRecognizer { stack: Vec, + subjects: Vec, + predicates: Vec, + objects: Vec, +} +pub struct NQuadsRecognizerContext { with_graph_name: bool, #[cfg(feature = "rdf-star")] with_quoted_triples: bool, lexer_options: N3LexerOptions, - subjects: Vec, - predicates: Vec, - objects: Vec, } enum NQuadsState { @@ -39,6 +41,7 @@ enum NQuadsState { impl RuleRecognizer for NQuadsRecognizer { type TokenRecognizer = N3Lexer; type Output = Quad; + type Context = NQuadsRecognizerContext; fn error_recovery_state(mut self) -> Self { self.stack.clear(); @@ -51,6 +54,7 @@ impl RuleRecognizer for NQuadsRecognizer { fn recognize_next( mut self, token: N3Token, + context: &mut NQuadsRecognizerContext, results: &mut Vec, errors: &mut Vec, ) -> Self { @@ -69,7 +73,7 @@ impl RuleRecognizer for NQuadsRecognizer { self } #[cfg(feature = "rdf-star")] - N3Token::Punctuation("<<") if self.with_quoted_triples => { + N3Token::Punctuation("<<") if context.with_quoted_triples => { self.stack.push(NQuadsState::AfterQuotedSubject); self.stack.push(NQuadsState::ExpectSubject); self @@ -111,7 +115,7 @@ impl RuleRecognizer for NQuadsRecognizer { self } #[cfg(feature = "rdf-star")] - N3Token::Punctuation("<<") if self.with_quoted_triples => { + N3Token::Punctuation("<<") if context.with_quoted_triples => { self.stack.push(NQuadsState::AfterQuotedObject); self.stack.push(NQuadsState::ExpectSubject); self @@ -143,7 +147,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.objects.push(Literal::new_simple_literal(value).into()); self.stack .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } }, NQuadsState::ExpectLiteralDatatype { value } => match token { @@ -164,7 +168,7 @@ impl RuleRecognizer for NQuadsRecognizer { NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => { if self.stack.is_empty() { match token { - N3Token::IriRef(g) if self.with_graph_name => { + N3Token::IriRef(g) if context.with_graph_name => { self.emit_quad( results, NamedNode::from(g).into(), @@ -172,7 +176,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.stack.push(NQuadsState::ExpectDot); self } - N3Token::BlankNodeLabel(g) if self.with_graph_name => { + N3Token::BlankNodeLabel(g) if context.with_graph_name => { self.emit_quad(results, BlankNode::new_unchecked(g).into()); self.stack.push(NQuadsState::ExpectDot); self @@ -180,7 +184,7 @@ impl RuleRecognizer for NQuadsRecognizer { _ => { self.emit_quad(results, GraphName::DefaultGraph); self.stack.push(NQuadsState::ExpectDot); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } } else if token == N3Token::Punctuation(">>") { @@ -195,7 +199,7 @@ impl RuleRecognizer for NQuadsRecognizer { } else { errors.push("Quads should be followed by a dot".into()); self.stack.push(NQuadsState::ExpectSubject); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, #[cfg(feature = "rdf-star")] NQuadsState::AfterQuotedSubject => { @@ -206,7 +210,7 @@ impl RuleRecognizer for NQuadsRecognizer { }; self.subjects.push(triple.into()); self.stack.push(NQuadsState::ExpectPredicate); - self.recognize_next(token, results, errors) + self.recognize_next(token,context, results, errors) } #[cfg(feature = "rdf-star")] NQuadsState::AfterQuotedObject => { @@ -218,7 +222,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.objects.push(triple.into()); self.stack .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } } else if token == N3Token::Punctuation(".") { @@ -229,7 +233,12 @@ impl RuleRecognizer for NQuadsRecognizer { } } - fn recognize_end(mut self, results: &mut Vec, errors: &mut Vec) { + fn recognize_end( + mut self, + _context: &mut NQuadsRecognizerContext, + results: &mut Vec, + errors: &mut Vec, + ) { match &*self.stack { [NQuadsState::ExpectSubject] | [] => (), [NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()), @@ -246,8 +255,8 @@ impl RuleRecognizer for NQuadsRecognizer { } } - fn lexer_options(&self) -> &N3LexerOptions { - &self.lexer_options + fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions { + &context.lexer_options } } @@ -266,13 +275,15 @@ impl NQuadsRecognizer { ), NQuadsRecognizer { stack: vec![NQuadsState::ExpectSubject], + subjects: Vec::new(), + predicates: Vec::new(), + objects: Vec::new(), + }, + NQuadsRecognizerContext { with_graph_name, #[cfg(feature = "rdf-star")] with_quoted_triples, lexer_options: N3LexerOptions::default(), - subjects: Vec::new(), - predicates: Vec::new(), - objects: Vec::new(), }, ) } diff --git a/lib/oxttl/src/n3.rs b/lib/oxttl/src/n3.rs index 84e36235..c4f02613 100644 --- a/lib/oxttl/src/n3.rs +++ b/lib/oxttl/src/n3.rs @@ -272,30 +272,29 @@ impl N3Parser { /// ``` /// use oxrdf::{NamedNode, vocab::rdf}; /// use oxttl::n3::{N3Parser, N3Term}; - /// use oxttl::ParseError; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = b"@base . - /// @prefix schema: . - /// a schema:Person ; - /// schema:name \"Foo\" . - /// a schema:Person ; - /// schema:name \"Bar\" ."; + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" . + /// a schema:Person ; + /// schema:name \"Bar\" ."; /// - /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); - /// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); - /// let mut count = 0; - /// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf_type && triple.object == schema_person { - /// count += 1; - /// } + /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); + /// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); + /// let mut count = 0; + /// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf_type && triple.object == schema_person { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -382,6 +381,33 @@ pub struct FromReadN3Reader { inner: FromReadIterator, } +impl FromReadN3Reader { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = N3Parser::new().parse_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } +} + impl Iterator for FromReadN3Reader { type Item = Result; @@ -396,30 +422,29 @@ impl Iterator for FromReadN3Reader { /// ``` /// use oxrdf::{NamedNode, vocab::rdf}; /// use oxttl::n3::{N3Parser, N3Term}; -/// use oxttl::ParseError; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = b"@base . -/// @prefix schema: . -/// a schema:Person ; -/// schema:name \"Foo\" . -/// a schema:Person ; -/// schema:name \"Bar\" ."; +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::ParseError> { +/// let file = b"@base . +/// @prefix schema: . +/// a schema:Person ; +/// schema:name \"Foo\" . +/// a schema:Person ; +/// schema:name \"Bar\" ."; /// -/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); -/// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); -/// let mut count = 0; -/// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf_type && triple.object == schema_person { -/// count += 1; -/// } +/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned()); +/// let schema_person = N3Term::NamedNode(NamedNode::new_unchecked("http://schema.org/Person")); +/// let mut count = 0; +/// let mut parser = N3Parser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf_type && triple.object == schema_person { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] @@ -433,6 +458,34 @@ impl FromTokioAsyncReadN3Reader { pub async fn next(&mut self) -> Option> { Some(self.inner.next().await?.map(Into::into)) } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = N3Parser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } } /// Parses a N3 file by using a low-level API. Can be built using [`N3Parser::parse`]. @@ -501,6 +554,32 @@ impl LowLevelN3Reader { pub fn read_next(&mut self) -> Option> { self.parser.read_next() } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::N3Parser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = N3Parser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.parser.context.prefixes + } } #[derive(Clone)] @@ -511,16 +590,20 @@ enum Predicate { struct N3Recognizer { stack: Vec, - lexer_options: N3LexerOptions, - prefixes: HashMap>, terms: Vec, predicates: Vec, contexts: Vec, } +struct N3RecognizerContext { + lexer_options: N3LexerOptions, + prefixes: HashMap>, +} + impl RuleRecognizer for N3Recognizer { type TokenRecognizer = N3Lexer; type Output = N3Quad; + type Context = N3RecognizerContext; fn error_recovery_state(mut self) -> Self { self.stack.clear(); @@ -533,6 +616,7 @@ impl RuleRecognizer for N3Recognizer { fn recognize_next( mut self, token: N3Token, + context: &mut N3RecognizerContext, results: &mut Vec, errors: &mut Vec, ) -> Self { @@ -570,7 +654,7 @@ impl RuleRecognizer for N3Recognizer { _ => { self.stack.push(N3State::N3DocExpectDot); self.stack.push(N3State::Triples); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } }, @@ -579,12 +663,12 @@ impl RuleRecognizer for N3Recognizer { self } else { errors.push("A dot is expected at the end of N3 statements".into()); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } }, N3State::BaseExpectIri => match token { N3Token::IriRef(iri) => { - self.lexer_options.base_iri = Some(iri); + context.lexer_options.base_iri = Some(iri); self } _ => self.error(errors, "The BASE keyword should be followed by an IRI"), @@ -600,7 +684,7 @@ impl RuleRecognizer for N3Recognizer { }, N3State::PrefixExpectIri { name } => match token { N3Token::IriRef(iri) => { - self.prefixes.insert(name, iri); + context.prefixes.insert(name, iri); self } _ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"), @@ -609,25 +693,25 @@ impl RuleRecognizer for N3Recognizer { N3State::Triples => { self.stack.push(N3State::TriplesMiddle); self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, N3State::TriplesMiddle => if matches!(token, N3Token::Punctuation("." | "]" | "}" | ")")) { - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } else { self.stack.push(N3State::TriplesEnd); self.stack.push(N3State::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, N3State::TriplesEnd => { self.terms.pop(); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, // [10] predicateObjectList ::= verb objectList ( ";" ( verb objectList) ? ) * N3State::PredicateObjectList => { self.stack.push(N3State::PredicateObjectListEnd); self.stack.push(N3State::ObjectsList); self.stack.push(N3State::Verb); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, N3State::PredicateObjectListEnd => { self.predicates.pop(); @@ -635,25 +719,25 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::PredicateObjectListPossibleContinuation); self } else { - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } }, N3State::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") { self.stack.push(N3State::PredicateObjectListPossibleContinuation); self } else if matches!(token, N3Token::Punctuation(";" | "." | "}" | "]" | ")")) { - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } else { self.stack.push(N3State::PredicateObjectListEnd); self.stack.push(N3State::ObjectsList); self.stack.push(N3State::Verb); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, // [11] objectList ::= object ( "," object) * N3State::ObjectsList => { self.stack.push(N3State::ObjectsListEnd); self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } N3State::ObjectsListEnd => { let object = self.terms.pop().unwrap(); @@ -675,7 +759,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::Path); self } else { - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } }, // [12] verb ::= predicate | "a" | ( "has" expression) | ( "is" expression "of") | "=" | "<=" | "=>" @@ -715,16 +799,16 @@ impl RuleRecognizer for N3Recognizer { _ => { self.stack.push(N3State::AfterRegularVerb); self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } N3State::AfterRegularVerb => { self.predicates.push(Predicate::Regular(self.terms.pop().unwrap())); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } N3State::AfterInvertedVerb => { self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap())); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } N3State::AfterVerbIs => match token { N3Token::PlainKeyword("of") => { @@ -742,7 +826,7 @@ impl RuleRecognizer for N3Recognizer { N3State::Path => { self.stack.push(N3State::PathFollowUp); self.stack.push(N3State::PathItem); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } N3State::PathFollowUp => match token { N3Token::Punctuation("!") => { @@ -755,7 +839,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::PathItem); self } - _ => self.recognize_next(token, results, errors) + _ => self.recognize_next(token, context, results, errors) }, N3State::PathAfterIndicator { is_inverse } => { let predicate = self.terms.pop().unwrap(); @@ -764,7 +848,7 @@ impl RuleRecognizer for N3Recognizer { results.push(if is_inverse { self.quad(current.clone(), predicate, previous) } else { self.quad(previous, predicate, current.clone())}); self.terms.push(current.into()); self.stack.push(N3State::PathFollowUp); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, // [18] pathItem ::= iri | blankNode | quickVar | collection | blankNodePropertyList | iriPropertyList | literal | formula // [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL @@ -784,7 +868,7 @@ impl RuleRecognizer for N3Recognizer { self.terms.push(NamedNode::from(iri).into()); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.terms.push(t.into()); self @@ -852,14 +936,14 @@ impl RuleRecognizer for N3Recognizer { self.terms.push(BlankNode::default().into()); self.stack.push(N3State::PropertyListEnd); self.stack.push(N3State::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } N3State::PropertyListEnd => if token == N3Token::Punctuation("]") { self } else { errors.push("blank node property lists should end with a ']'".into()); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } N3State::IriPropertyList => match token { N3Token::IriRef(id) => { @@ -868,7 +952,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::PredicateObjectList); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.terms.push(t.into()); self.stack.push(N3State::PropertyListEnd); @@ -890,7 +974,7 @@ impl RuleRecognizer for N3Recognizer { self.terms.push(root.into()); self.stack.push(N3State::CollectionPossibleEnd); self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) }, N3State::CollectionPossibleEnd => { let value = self.terms.pop().unwrap(); @@ -917,7 +1001,7 @@ impl RuleRecognizer for N3Recognizer { self.terms.push(new.into()); self.stack.push(N3State::CollectionPossibleEnd); self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } N3State::LiteralPossibleSuffix { value } => { @@ -932,7 +1016,7 @@ impl RuleRecognizer for N3Recognizer { } _ => { self.terms.push(Literal::new_simple_literal(value).into()); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } } @@ -942,7 +1026,7 @@ impl RuleRecognizer for N3Recognizer { self.terms.push(Literal::new_typed_literal(value, datatype).into()); self }, - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(datatype) =>{ self.terms.push(Literal::new_typed_literal(value, datatype).into()); self @@ -950,7 +1034,7 @@ impl RuleRecognizer for N3Recognizer { Err(e) => self.error(errors, e) } _ => { - self.error(errors, "Expecting a datatype IRI after '^^, found TOKEN").recognize_next(token, results, errors) + self.error(errors, "Expecting a datatype IRI after '^^, found TOKEN").recognize_next(token, context, results, errors) } } } @@ -984,7 +1068,7 @@ impl RuleRecognizer for N3Recognizer { _ => { self.stack.push(N3State::FormulaContentExpectDot); self.stack.push(N3State::Triples); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } } @@ -1001,7 +1085,7 @@ impl RuleRecognizer for N3Recognizer { _ => { errors.push("A dot is expected at the end of N3 statements".into()); self.stack.push(N3State::FormulaContent); - self.recognize_next(token, results, errors) + self.recognize_next(token, context, results, errors) } } } @@ -1016,6 +1100,7 @@ impl RuleRecognizer for N3Recognizer { fn recognize_end( self, + _state: &mut N3RecognizerContext, _results: &mut Vec, errors: &mut Vec, ) { @@ -1025,8 +1110,8 @@ impl RuleRecognizer for N3Recognizer { } } - fn lexer_options(&self) -> &N3LexerOptions { - &self.lexer_options + fn lexer_options(context: &N3RecognizerContext) -> &N3LexerOptions { + &context.lexer_options } } @@ -1045,12 +1130,14 @@ impl N3Recognizer { ), N3Recognizer { stack: vec![N3State::N3Doc], - lexer_options: N3LexerOptions { base_iri }, - prefixes, terms: Vec::new(), predicates: Vec::new(), contexts: Vec::new(), }, + N3RecognizerContext { + lexer_options: N3LexerOptions { base_iri }, + prefixes, + }, ) } diff --git a/lib/oxttl/src/nquads.rs b/lib/oxttl/src/nquads.rs index caa7c642..c2bf35cd 100644 --- a/lib/oxttl/src/nquads.rs +++ b/lib/oxttl/src/nquads.rs @@ -90,27 +90,27 @@ impl NQuadsParser { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; - /// use oxttl::{ParseError, NQuadsParser}; + /// use oxttl::NQuadsParser; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = b" . + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b" . /// \"Foo\" . /// . /// \"Bar\" ."; /// - /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); - /// let mut count = 0; - /// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { - /// count += 1; - /// } + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -211,27 +211,27 @@ impl Iterator for FromReadNQuadsReader { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; -/// use oxttl::{ParseError, NQuadsParser}; +/// use oxttl::NQuadsParser; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = b" . +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::ParseError> { +/// let file = b" . /// \"Foo\" . /// . /// \"Bar\" ."; /// -/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); -/// let mut count = 0; -/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { -/// count += 1; -/// } +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] @@ -377,10 +377,9 @@ impl NQuadsSerializer { /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::NQuadsSerializer; - /// use std::io::Result; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<()> { + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { /// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new_unchecked("http://example.com#me"), @@ -392,8 +391,8 @@ impl NQuadsSerializer { /// b" .\n", /// writer.finish().as_slice() /// ); - /// Ok(()) - /// } + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( @@ -475,10 +474,9 @@ impl ToWriteNQuadsWriter { /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::NQuadsSerializer; -/// use std::io::Result; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<()> { +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { /// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new()); /// writer.write_quad(QuadRef::new( /// NamedNodeRef::new_unchecked("http://example.com#me"), @@ -490,8 +488,8 @@ impl ToWriteNQuadsWriter { /// b" .\n", /// writer.finish().as_slice() /// ); -/// Ok(()) -/// } +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] diff --git a/lib/oxttl/src/ntriples.rs b/lib/oxttl/src/ntriples.rs index f8c32b12..4fd06227 100644 --- a/lib/oxttl/src/ntriples.rs +++ b/lib/oxttl/src/ntriples.rs @@ -91,27 +91,27 @@ impl NTriplesParser { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; - /// use oxttl::{ParseError, NTriplesParser}; + /// use oxttl::NTriplesParser; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = b" . + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b" . /// \"Foo\" . /// . /// \"Bar\" ."; /// - /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); - /// let mut count = 0; - /// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { - /// count += 1; - /// } + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -212,27 +212,27 @@ impl Iterator for FromReadNTriplesReader { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; -/// use oxttl::{ParseError, NTriplesParser}; +/// use oxttl::NTriplesParser; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = b" . +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::ParseError> { +/// let file = b" . /// \"Foo\" . /// . /// \"Bar\" ."; /// -/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); -/// let mut count = 0; -/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { -/// count += 1; -/// } +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] @@ -376,22 +376,21 @@ impl NTriplesSerializer { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; - /// use std::io::Result; /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<()> { - /// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// )).await?; - /// assert_eq!( - /// b" .\n", - /// writer.finish().as_slice() - /// ); - /// Ok(()) - /// } + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )).await?; + /// assert_eq!( + /// b" .\n", + /// writer.finish().as_slice() + /// ); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( @@ -471,22 +470,21 @@ impl ToWriteNTriplesWriter { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::NTriplesSerializer; -/// use std::io::Result; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<()> { -/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person") -/// )).await?; -/// assert_eq!( -/// b" .\n", -/// writer.finish().as_slice() -/// ); -/// Ok(()) -/// } +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person") +/// )).await?; +/// assert_eq!( +/// b" .\n", +/// writer.finish().as_slice() +/// ); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] diff --git a/lib/oxttl/src/terse.rs b/lib/oxttl/src/terse.rs index f99aaf4d..ecce10b6 100644 --- a/lib/oxttl/src/terse.rs +++ b/lib/oxttl/src/terse.rs @@ -14,20 +14,25 @@ use std::collections::HashMap; pub struct TriGRecognizer { stack: Vec, - with_graph_name: bool, - #[cfg(feature = "rdf-star")] - with_quoted_triples: bool, - lexer_options: N3LexerOptions, - prefixes: HashMap>, cur_subject: Vec, cur_predicate: Vec, cur_object: Vec, cur_graph: GraphName, } +#[allow(clippy::partial_pub_fields)] +pub struct TriGRecognizerContext { + lexer_options: N3LexerOptions, + pub with_graph_name: bool, + #[cfg(feature = "rdf-star")] + pub with_quoted_triples: bool, + pub prefixes: HashMap>, +} + impl RuleRecognizer for TriGRecognizer { type TokenRecognizer = N3Lexer; type Output = Quad; + type Context = TriGRecognizerContext; fn error_recovery_state(mut self) -> Self { self.stack.clear(); @@ -41,6 +46,7 @@ impl RuleRecognizer for TriGRecognizer { fn recognize_next( mut self, token: N3Token, + context: &mut TriGRecognizerContext, results: &mut Vec, errors: &mut Vec, ) -> Self { @@ -75,18 +81,18 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::BaseExpectIri); self } - N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("graph") && self.with_graph_name => { + N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("graph") && context.with_graph_name => { self.stack.push(TriGState::WrappedGraph); self.stack.push(TriGState::GraphName); self } - N3Token::Punctuation("{") if self.with_graph_name => { + N3Token::Punctuation("{") if context.with_graph_name => { self.stack.push(TriGState::WrappedGraph); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } _ => { self.stack.push(TriGState::TriplesOrGraph); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } }, @@ -96,12 +102,12 @@ impl RuleRecognizer for TriGRecognizer { self } else { errors.push("A dot is expected at the end of statements".into()); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } }, TriGState::BaseExpectIri => match token { N3Token::IriRef(iri) => { - self.lexer_options.base_iri = Some(iri); + context.lexer_options.base_iri = Some(iri); self } _ => self.error(errors, "The BASE keyword should be followed by an IRI"), @@ -117,7 +123,7 @@ impl RuleRecognizer for TriGRecognizer { }, TriGState::PrefixExpectIri { name } => match token { N3Token::IriRef(iri) => { - self.prefixes.insert(name, iri); + context.prefixes.insert(name, iri); self } _ => self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI"), @@ -131,7 +137,7 @@ impl RuleRecognizer for TriGRecognizer { }); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { term: t.into() @@ -157,7 +163,7 @@ impl RuleRecognizer for TriGRecognizer { self } #[cfg(feature = "rdf-star")] - N3Token::Punctuation("<<") if self.with_quoted_triples => { + N3Token::Punctuation("<<") if context.with_quoted_triples => { self.stack.push(TriGState::ExpectDot); self.stack.push(TriGState::PredicateObjectList); self.stack.push(TriGState::SubjectQuotedTripleEnd); @@ -171,7 +177,7 @@ impl RuleRecognizer for TriGRecognizer { } } TriGState::WrappedGraphOrPredicateObjectList { term } => { - if token == N3Token::Punctuation("{") && self.with_graph_name { + if token == N3Token::Punctuation("{") && context.with_graph_name { self.cur_graph = term.into(); self.stack.push(TriGState::WrappedGraph); } else { @@ -179,7 +185,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::ExpectDot); self.stack.push(TriGState::PredicateObjectList); } - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::WrappedGraphBlankNodePropertyListCurrent => if token == N3Token::Punctuation("]") { self.stack.push(TriGState::WrappedGraphOrPredicateObjectList { @@ -191,7 +197,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::ExpectDot); self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); self.stack.push(TriGState::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::SubjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") { self.stack.push(TriGState::SubjectBlankNodePropertyListAfter ); @@ -199,13 +205,13 @@ impl RuleRecognizer for TriGRecognizer { } else { errors.push("blank node property lists should end with a ']'".into()); self.stack.push(TriGState::SubjectBlankNodePropertyListAfter ); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::SubjectBlankNodePropertyListAfter => if matches!(token, N3Token::Punctuation("." | "}")) { - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } else { self.stack.push(TriGState::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::SubjectCollectionBeginning => { if let N3Token::Punctuation(")") = token { @@ -218,7 +224,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_predicate.push(rdf::FIRST.into()); self.stack.push(TriGState::SubjectCollectionPossibleEnd); self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } }, TriGState::SubjectCollectionPossibleEnd => { @@ -244,7 +250,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_subject.push(new.into()); self.stack.push(TriGState::ObjectCollectionPossibleEnd); self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } // [5g] wrappedGraph ::= '{' triplesBlock? '}' @@ -269,7 +275,7 @@ impl RuleRecognizer for TriGRecognizer { } _ => { errors.push("A '}' or a '.' is expected at the end of a graph block".into()); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } } @@ -277,7 +283,7 @@ impl RuleRecognizer for TriGRecognizer { // [10] subject ::= iri | BlankNode | collection | quotedTriple TriGState::Triples => match token { N3Token::Punctuation("}") => { - self.recognize_next(token, results, errors) // Early end + self.recognize_next(token, context,results, errors) // Early end }, N3Token::Punctuation("[") => { self.cur_subject.push(BlankNode::default().into()); @@ -289,7 +295,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::PredicateObjectList); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_subject.push(t.into()); self.stack.push(TriGState::PredicateObjectList); @@ -308,7 +314,7 @@ impl RuleRecognizer for TriGRecognizer { self } #[cfg(feature = "rdf-star")] - N3Token::Punctuation("<<") if self.with_quoted_triples => { + N3Token::Punctuation("<<") if context.with_quoted_triples => { self.stack.push(TriGState::PredicateObjectList); self.stack.push(TriGState::SubjectQuotedTripleEnd); self.stack.push(TriGState::QuotedObject); @@ -326,7 +332,7 @@ impl RuleRecognizer for TriGRecognizer { } else { self.stack.push(TriGState::SubjectBlankNodePropertyListEnd); self.stack.push(TriGState::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } // [7g] labelOrSubject ::= iri | BlankNode TriGState::GraphName => match token { @@ -334,7 +340,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_graph = NamedNode::from(iri).into(); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_graph = t.into(); self @@ -364,7 +370,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::PredicateObjectListEnd); self.stack.push(TriGState::ObjectsList); self.stack.push(TriGState::Verb); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) }, TriGState::PredicateObjectListEnd => { self.cur_predicate.pop(); @@ -372,26 +378,26 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::PredicateObjectListPossibleContinuation); self } else { - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } }, TriGState::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") { self.stack.push(TriGState::PredicateObjectListPossibleContinuation); self } else if matches!(token, N3Token::Punctuation("." | "}" | "]")) { - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } else { self.stack.push(TriGState::PredicateObjectListEnd); self.stack.push(TriGState::ObjectsList); self.stack.push(TriGState::Verb); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) }, // [8] objectList ::= object annotation? ( ',' object annotation? )* // [30t] annotation ::= '{|' predicateObjectList '|}' TriGState::ObjectsList => { self.stack.push(TriGState::ObjectsListEnd); self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::ObjectsListEnd => { match token { @@ -415,7 +421,7 @@ impl RuleRecognizer for TriGRecognizer { } _ => { self.cur_object.pop(); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } }, @@ -435,7 +441,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::Object); self } else { - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) }, // [9] verb ::= predicate | 'a' // [11] predicate ::= iri @@ -448,7 +454,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_predicate.push(NamedNode::from(iri)); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_predicate.push(t); self @@ -477,7 +483,7 @@ impl RuleRecognizer for TriGRecognizer { self.emit_quad(results); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_object.push(t.into()); self.emit_quad(results); @@ -528,7 +534,7 @@ impl RuleRecognizer for TriGRecognizer { self } #[cfg(feature = "rdf-star")] - N3Token::Punctuation("<<") if self.with_quoted_triples => { + N3Token::Punctuation("<<") if context.with_quoted_triples => { self.stack.push(TriGState::ObjectQuotedTripleEnd { emit: true }); self.stack.push(TriGState::QuotedObject); self.stack.push(TriGState::Verb); @@ -548,7 +554,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_subject.push(BlankNode::default().into()); self.stack.push(TriGState::ObjectBlankNodePropertyListEnd); self.stack.push(TriGState::PredicateObjectList); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } TriGState::ObjectBlankNodePropertyListEnd => if token == N3Token::Punctuation("]") { self.cur_object.push(self.cur_subject.pop().unwrap().into()); @@ -569,7 +575,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_predicate.push(rdf::FIRST.into()); self.stack.push(TriGState::ObjectCollectionPossibleEnd); self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) }, TriGState::ObjectCollectionPossibleEnd => { let old = self.cur_subject.pop().unwrap(); @@ -592,7 +598,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_subject.push(new.into()); self.stack.push(TriGState::ObjectCollectionPossibleEnd); self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } TriGState::LiteralPossibleSuffix { value, emit } => { @@ -613,7 +619,7 @@ impl RuleRecognizer for TriGRecognizer { if emit { self.emit_quad(results); } - self.recognize_next(token, results, errors) + self.recognize_next(token, context,results, errors) } } } @@ -626,7 +632,7 @@ impl RuleRecognizer for TriGRecognizer { } self }, - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_object.push(Literal::new_typed_literal(value, t).into()); if emit { @@ -637,7 +643,7 @@ impl RuleRecognizer for TriGRecognizer { Err(e) => self.error(errors, e) } _ => { - self.error(errors, "Expecting a datatype IRI after ^^, found TOKEN").recognize_next(token, results, errors) + self.error(errors, "Expecting a datatype IRI after ^^, found TOKEN").recognize_next(token, context, results, errors) } } } @@ -685,7 +691,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_subject.push(NamedNode::from(iri).into()); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_subject.push(t.into()); self @@ -717,7 +723,7 @@ impl RuleRecognizer for TriGRecognizer { self.cur_object.push(NamedNode::from(iri).into()); self } - N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &self.prefixes) { + N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) { Ok(t) => { self.cur_object.push(t.into()); self @@ -779,6 +785,7 @@ impl RuleRecognizer for TriGRecognizer { fn recognize_end( mut self, + _context: &mut TriGRecognizerContext, results: &mut Vec, errors: &mut Vec, ) { @@ -807,8 +814,8 @@ impl RuleRecognizer for TriGRecognizer { } } - fn lexer_options(&self) -> &N3LexerOptions { - &self.lexer_options + fn lexer_options(context: &TriGRecognizerContext) -> &N3LexerOptions { + &context.lexer_options } } @@ -829,16 +836,18 @@ impl TriGRecognizer { ), TriGRecognizer { stack: vec![TriGState::TriGDoc], - with_graph_name, - #[cfg(feature = "rdf-star")] - with_quoted_triples, - lexer_options: N3LexerOptions { base_iri }, - prefixes, cur_subject: Vec::new(), cur_predicate: Vec::new(), cur_object: Vec::new(), cur_graph: GraphName::DefaultGraph, }, + TriGRecognizerContext { + with_graph_name, + #[cfg(feature = "rdf-star")] + with_quoted_triples, + prefixes, + lexer_options: N3LexerOptions { base_iri }, + }, ) } diff --git a/lib/oxttl/src/toolkit/parser.rs b/lib/oxttl/src/toolkit/parser.rs index 7a9ba8bf..7af93752 100644 --- a/lib/oxttl/src/toolkit/parser.rs +++ b/lib/oxttl/src/toolkit/parser.rs @@ -7,19 +7,28 @@ use tokio::io::AsyncRead; pub trait RuleRecognizer: Sized { type TokenRecognizer: TokenRecognizer; type Output; + type Context; fn error_recovery_state(self) -> Self; fn recognize_next( self, token: ::Token<'_>, + context: &mut Self::Context, results: &mut Vec, errors: &mut Vec, ) -> Self; - fn recognize_end(self, results: &mut Vec, errors: &mut Vec); + fn recognize_end( + self, + context: &mut Self::Context, + results: &mut Vec, + errors: &mut Vec, + ); - fn lexer_options(&self) -> &::Options; + fn lexer_options( + context: &Self::Context, + ) -> &::Options; } pub struct RuleRecognizerError { @@ -34,22 +43,23 @@ impl> From for RuleRecognizerError { } } +#[allow(clippy::partial_pub_fields)] pub struct Parser { lexer: Lexer, state: Option, + pub context: RR::Context, results: Vec, errors: Vec, - default_lexer_options: ::Options, } impl Parser { - pub fn new(lexer: Lexer, recognizer: RR) -> Self { + pub fn new(lexer: Lexer, recognizer: RR, context: RR::Context) -> Self { Self { lexer, state: Some(recognizer), + context, results: vec![], errors: vec![], - default_lexer_options: ::Options::default(), } } @@ -80,15 +90,16 @@ impl Parser { if let Some(result) = self.results.pop() { return Some(Ok(result)); } - if let Some(result) = self.lexer.read_next( - self.state - .as_ref() - .map_or(&self.default_lexer_options, |p| p.lexer_options()), - ) { + if let Some(result) = self.lexer.read_next(RR::lexer_options(&self.context)) { match result { Ok(token) => { self.state = self.state.take().map(|state| { - state.recognize_next(token, &mut self.results, &mut self.errors) + state.recognize_next( + token, + &mut self.context, + &mut self.results, + &mut self.errors, + ) }); continue; } @@ -102,7 +113,7 @@ impl Parser { let Some(state) = self.state.take() else { return None; }; - state.recognize_end(&mut self.results, &mut self.errors) + state.recognize_end(&mut self.context, &mut self.results, &mut self.errors) } else { return None; } @@ -122,9 +133,10 @@ impl Parser { } } +#[allow(clippy::partial_pub_fields)] pub struct FromReadIterator { read: R, - parser: Parser, + pub parser: Parser, } impl Iterator for FromReadIterator { @@ -145,8 +157,8 @@ impl Iterator for FromReadIterator { #[cfg(feature = "async-tokio")] pub struct FromTokioAsyncReadIterator { - read: R, - parser: Parser, + pub read: R, + pub parser: Parser, } #[cfg(feature = "async-tokio")] diff --git a/lib/oxttl/src/trig.rs b/lib/oxttl/src/trig.rs index 6dfb2b43..1b5e04a3 100644 --- a/lib/oxttl/src/trig.rs +++ b/lib/oxttl/src/trig.rs @@ -116,29 +116,29 @@ impl TriGParser { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; - /// use oxttl::{ParseError, TriGParser}; - /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = b"@base . - /// @prefix schema: . - /// a schema:Person ; - /// schema:name \"Foo\" . - /// a schema:Person ; - /// schema:name \"Bar\" ."; - /// - /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); - /// let mut count = 0; - /// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { - /// count += 1; - /// } + /// use oxttl::TriGParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" . + /// a schema:Person ; + /// schema:name \"Bar\" ."; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -229,6 +229,33 @@ pub struct FromReadTriGReader { inner: FromReadIterator, } +impl FromReadTriGReader { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TriGParser::new().parse_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } +} + impl Iterator for FromReadTriGReader { type Item = Result; @@ -242,29 +269,29 @@ impl Iterator for FromReadTriGReader { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; -/// use oxttl::{ParseError, TriGParser}; +/// use oxttl::TriGParser; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = b"@base . -/// @prefix schema: . -/// a schema:Person ; -/// schema:name \"Foo\" . -/// a schema:Person ; -/// schema:name \"Bar\" ."; +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::ParseError> { +/// let file = b"@base . +/// @prefix schema: . +/// a schema:Person ; +/// schema:name \"Foo\" . +/// a schema:Person ; +/// schema:name \"Bar\" ."; /// -/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); -/// let mut count = 0; -/// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { -/// count += 1; -/// } +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = TriGParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] @@ -278,6 +305,34 @@ impl FromTokioAsyncReadTriGReader { pub async fn next(&mut self) -> Option> { Some(self.inner.next().await?.map(Into::into)) } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TriGParser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } } /// Parses a TriG file by using a low-level API. Can be built using [`TriGParser::parse`]. @@ -345,6 +400,32 @@ impl LowLevelTriGReader { pub fn read_next(&mut self) -> Option> { self.parser.read_next() } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TriGParser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TriGParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.parser.context.prefixes + } } /// A [TriG](https://www.w3.org/TR/trig/) serializer. @@ -410,23 +491,22 @@ impl TriGSerializer { /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; - /// use std::io::Result; - /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<()> { - /// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_quad(QuadRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// NamedNodeRef::new_unchecked("http://example.com"), - /// )).await?; - /// assert_eq!( - /// b" {\n\t .\n}\n", - /// writer.finish().await?.as_slice() - /// ); - /// Ok(()) - /// } + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_quad(QuadRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// NamedNodeRef::new_unchecked("http://example.com"), + /// )).await?; + /// assert_eq!( + /// b" {\n\t .\n}\n", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( @@ -513,23 +593,22 @@ impl ToWriteTriGWriter { /// ``` /// use oxrdf::{NamedNodeRef, QuadRef}; /// use oxttl::TriGSerializer; -/// use std::io::Result; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<()> { -/// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_quad(QuadRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person"), -/// NamedNodeRef::new_unchecked("http://example.com"), -/// )).await?; -/// assert_eq!( -/// b" {\n\t .\n}\n", -/// writer.finish().await?.as_slice() -/// ); -/// Ok(()) -/// } +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = TriGSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_quad(QuadRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person"), +/// NamedNodeRef::new_unchecked("http://example.com"), +/// )).await?; +/// assert_eq!( +/// b" {\n\t .\n}\n", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] diff --git a/lib/oxttl/src/turtle.rs b/lib/oxttl/src/turtle.rs index 133c9cca..7106758d 100644 --- a/lib/oxttl/src/turtle.rs +++ b/lib/oxttl/src/turtle.rs @@ -118,29 +118,29 @@ impl TurtleParser { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; - /// use oxttl::{ParseError, TurtleParser}; - /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<(), ParseError> { - /// let file = b"@base . - /// @prefix schema: . - /// a schema:Person ; - /// schema:name \"Foo\" . - /// a schema:Person ; - /// schema:name \"Bar\" ."; - /// - /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); - /// let mut count = 0; - /// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); - /// while let Some(triple) = parser.next().await { - /// let triple = triple?; - /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { - /// count += 1; - /// } + /// use oxttl::TurtleParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" . + /// a schema:Person ; + /// schema:name \"Bar\" ."; + /// + /// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); + /// let mut count = 0; + /// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); + /// while let Some(triple) = parser.next().await { + /// let triple = triple?; + /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { + /// count += 1; /// } - /// assert_eq!(2, count); - /// Ok(()) /// } + /// assert_eq!(2, count); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn parse_tokio_async_read( @@ -231,6 +231,33 @@ pub struct FromReadTurtleReader { inner: FromReadIterator, } +impl FromReadTurtleReader { + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TurtleParser::new().parse_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } +} + impl Iterator for FromReadTurtleReader { type Item = Result; @@ -244,29 +271,29 @@ impl Iterator for FromReadTurtleReader { /// Count the number of people: /// ``` /// use oxrdf::{NamedNodeRef, vocab::rdf}; -/// use oxttl::{ParseError, TurtleParser}; +/// use oxttl::TurtleParser; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<(), ParseError> { -/// let file = b"@base . -/// @prefix schema: . -/// a schema:Person ; -/// schema:name \"Foo\" . -/// a schema:Person ; -/// schema:name \"Bar\" ."; +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> Result<(), oxttl::ParseError> { +/// let file = b"@base . +/// @prefix schema: . +/// a schema:Person ; +/// schema:name \"Foo\" . +/// a schema:Person ; +/// schema:name \"Bar\" ."; /// -/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); -/// let mut count = 0; -/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); -/// while let Some(triple) = parser.next().await { -/// let triple = triple?; -/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { -/// count += 1; -/// } +/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person"); +/// let mut count = 0; +/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref()); +/// while let Some(triple) = parser.next().await { +/// let triple = triple?; +/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { +/// count += 1; /// } -/// assert_eq!(2, count); -/// Ok(()) /// } +/// assert_eq!(2, count); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use] @@ -280,6 +307,34 @@ impl FromTokioAsyncReadTurtleReader { pub async fn next(&mut self) -> Option> { Some(self.inner.next().await?.map(Into::into)) } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> Result<(), oxttl::ParseError> { + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref()); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.next().await.unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Ok(()) + /// # } + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.inner.parser.context.prefixes + } } /// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`]. @@ -347,6 +402,32 @@ impl LowLevelTurtleReader { pub fn read_next(&mut self) -> Option> { Some(self.parser.read_next()?.map(Into::into)) } + + /// The list of IRI prefixes considered at the current step of the parsing. + /// + /// This method returns the mapping from prefix name to prefix value. + /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered. + /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned). + /// + /// ``` + /// use oxttl::TurtleParser; + /// + /// let file = b"@base . + /// @prefix schema: . + /// a schema:Person ; + /// schema:name \"Foo\" ."; + /// + /// let mut reader = TurtleParser::new().parse(); + /// reader.extend_from_slice(file); + /// assert!(reader.prefixes().is_empty()); // No prefix at the beginning + /// + /// reader.read_next().unwrap()?; // We read the first triple + /// assert_eq!(reader.prefixes()["schema"], "http://schema.org/"); // There are now prefixes + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn prefixes(&self) -> &HashMap> { + &self.parser.context.prefixes + } } /// A [Turtle](https://www.w3.org/TR/turtle/) serializer. @@ -411,22 +492,21 @@ impl TurtleSerializer { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::TurtleSerializer; - /// use std::io::Result; - /// - /// #[tokio::main(flavor = "current_thread")] - /// async fn main() -> Result<()> { - /// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); - /// writer.write_triple(TripleRef::new( - /// NamedNodeRef::new_unchecked("http://example.com#me"), - /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - /// NamedNodeRef::new_unchecked("http://schema.org/Person"), - /// )).await?; - /// assert_eq!( - /// b" .\n", - /// writer.finish().await?.as_slice() - /// ); - /// Ok(()) - /// } + /// + /// # #[tokio::main(flavor = "current_thread")] + /// # async fn main() -> std::io::Result<()> { + /// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); + /// writer.write_triple(TripleRef::new( + /// NamedNodeRef::new_unchecked("http://example.com#me"), + /// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + /// NamedNodeRef::new_unchecked("http://schema.org/Person"), + /// )).await?; + /// assert_eq!( + /// b" .\n", + /// writer.finish().await?.as_slice() + /// ); + /// # Ok(()) + /// # } /// ``` #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( @@ -506,22 +586,21 @@ impl ToWriteTurtleWriter { /// ``` /// use oxrdf::{NamedNodeRef, TripleRef}; /// use oxttl::TurtleSerializer; -/// use std::io::Result; /// -/// #[tokio::main(flavor = "current_thread")] -/// async fn main() -> Result<()> { -/// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); -/// writer.write_triple(TripleRef::new( -/// NamedNodeRef::new_unchecked("http://example.com#me"), -/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), -/// NamedNodeRef::new_unchecked("http://schema.org/Person") -/// )).await?; -/// assert_eq!( -/// b" .\n", -/// writer.finish().await?.as_slice() -/// ); -/// Ok(()) -/// } +/// # #[tokio::main(flavor = "current_thread")] +/// # async fn main() -> std::io::Result<()> { +/// let mut writer = TurtleSerializer::new().serialize_to_tokio_async_write(Vec::new()); +/// writer.write_triple(TripleRef::new( +/// NamedNodeRef::new_unchecked("http://example.com#me"), +/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), +/// NamedNodeRef::new_unchecked("http://schema.org/Person") +/// )).await?; +/// assert_eq!( +/// b" .\n", +/// writer.finish().await?.as_slice() +/// ); +/// # Ok(()) +/// # } /// ``` #[cfg(feature = "async-tokio")] #[must_use]