parent
a89d173093
commit
7d6c5190c1
@ -1,6 +1,6 @@ |
||||
#![allow(deprecated)] |
||||
|
||||
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
||||
use crate::oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
||||
|
||||
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
|
||||
///
|
@ -0,0 +1,5 @@ |
||||
pub mod io; |
||||
pub mod model; |
||||
pub mod sparql; |
||||
mod storage; |
||||
pub mod store; |
@ -1,8 +1,10 @@ |
||||
use crate::model::TermRef; |
||||
use crate::sparql::algebra::QueryDataset; |
||||
use crate::sparql::EvaluationError; |
||||
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; |
||||
use crate::storage::{StorageError, StorageReader}; |
||||
use crate::oxigraph::model::TermRef; |
||||
use crate::oxigraph::sparql::algebra::QueryDataset; |
||||
use crate::oxigraph::sparql::EvaluationError; |
||||
use crate::oxigraph::storage::numeric_encoder::{ |
||||
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, |
||||
}; |
||||
use crate::oxigraph::storage::{StorageError, StorageReader}; |
||||
use std::cell::RefCell; |
||||
use std::collections::hash_map::Entry; |
||||
use std::collections::HashMap; |
@ -1,8 +1,8 @@ |
||||
use crate::io::RdfParseError; |
||||
use crate::model::NamedNode; |
||||
use crate::sparql::results::QueryResultsParseError as ResultsParseError; |
||||
use crate::sparql::SparqlSyntaxError; |
||||
use crate::storage::StorageError; |
||||
use crate::oxigraph::io::RdfParseError; |
||||
use crate::oxigraph::model::NamedNode; |
||||
use crate::oxigraph::sparql::results::QueryResultsParseError as ResultsParseError; |
||||
use crate::oxigraph::sparql::SparqlSyntaxError; |
||||
use crate::oxigraph::storage::StorageError; |
||||
use std::convert::Infallible; |
||||
use std::error::Error; |
||||
use std::io; |
@ -1,33 +1,34 @@ |
||||
use crate::model::vocab::{rdf, xsd}; |
||||
use crate::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; |
||||
use crate::sparql::algebra::{Query, QueryDataset}; |
||||
use crate::sparql::dataset::DatasetView; |
||||
use crate::sparql::error::EvaluationError; |
||||
use crate::sparql::model::*; |
||||
use crate::sparql::service::ServiceHandler; |
||||
use crate::sparql::CustomFunctionRegistry; |
||||
use crate::storage::numeric_encoder::*; |
||||
use crate::storage::small_string::SmallString; |
||||
use crate::oxigraph::model::vocab::{rdf, xsd}; |
||||
use crate::oxigraph::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; |
||||
use crate::oxigraph::sparql::algebra::{Query, QueryDataset}; |
||||
use crate::oxigraph::sparql::dataset::DatasetView; |
||||
use crate::oxigraph::sparql::error::EvaluationError; |
||||
use crate::oxigraph::sparql::model::*; |
||||
use crate::oxigraph::sparql::service::ServiceHandler; |
||||
use crate::oxigraph::sparql::CustomFunctionRegistry; |
||||
use crate::oxigraph::storage::numeric_encoder::*; |
||||
use crate::oxigraph::storage::small_string::SmallString; |
||||
use crate::oxrdf::{TermRef, Variable}; |
||||
use crate::oxsdatatypes::*; |
||||
use crate::spargebra; |
||||
use crate::spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; |
||||
use crate::spargebra::term::{ |
||||
GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, |
||||
TriplePattern, |
||||
}; |
||||
use crate::sparopt::algebra::{ |
||||
AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, |
||||
MinusAlgorithm, OrderExpression, |
||||
}; |
||||
use digest::Digest; |
||||
use json_event_parser::{JsonEvent, ToWriteJsonWriter}; |
||||
use md5::Md5; |
||||
use oxilangtag::LanguageTag; |
||||
use oxiri::Iri; |
||||
use oxrdf::{TermRef, Variable}; |
||||
use oxsdatatypes::*; |
||||
use rand::random; |
||||
use regex::{Regex, RegexBuilder}; |
||||
use sha1::Sha1; |
||||
use sha2::{Sha256, Sha384, Sha512}; |
||||
use spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; |
||||
use spargebra::term::{ |
||||
GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, |
||||
TriplePattern, |
||||
}; |
||||
use sparopt::algebra::{ |
||||
AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, |
||||
MinusAlgorithm, OrderExpression, |
||||
}; |
||||
use std::cell::Cell; |
||||
use std::cmp::Ordering; |
||||
use std::collections::hash_map::DefaultHasher; |
@ -1,9 +1,9 @@ |
||||
use crate::model::NamedNode; |
||||
use crate::sparql::algebra::Query; |
||||
use crate::sparql::error::EvaluationError; |
||||
use crate::sparql::http::Client; |
||||
use crate::sparql::model::QueryResults; |
||||
use crate::sparql::results::QueryResultsFormat; |
||||
use crate::oxigraph::model::NamedNode; |
||||
use crate::oxigraph::sparql::algebra::Query; |
||||
use crate::oxigraph::sparql::error::EvaluationError; |
||||
use crate::oxigraph::sparql::http::Client; |
||||
use crate::oxigraph::sparql::model::QueryResults; |
||||
use crate::oxigraph::sparql::results::QueryResultsFormat; |
||||
use std::error::Error; |
||||
use std::time::Duration; |
||||
|
@ -1,21 +1,22 @@ |
||||
use crate::io::{RdfFormat, RdfParser}; |
||||
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; |
||||
use crate::sparql::algebra::QueryDataset; |
||||
use crate::sparql::dataset::DatasetView; |
||||
use crate::sparql::eval::{EncodedTuple, SimpleEvaluator}; |
||||
use crate::sparql::http::Client; |
||||
use crate::sparql::{EvaluationError, Update, UpdateOptions}; |
||||
use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; |
||||
use crate::storage::StorageWriter; |
||||
use oxiri::Iri; |
||||
use spargebra::algebra::{GraphPattern, GraphTarget}; |
||||
use spargebra::term::{ |
||||
use crate::oxigraph::io::{RdfFormat, RdfParser}; |
||||
use crate::oxigraph::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; |
||||
use crate::oxigraph::sparql::algebra::QueryDataset; |
||||
use crate::oxigraph::sparql::dataset::DatasetView; |
||||
use crate::oxigraph::sparql::eval::{EncodedTuple, SimpleEvaluator}; |
||||
use crate::oxigraph::sparql::http::Client; |
||||
use crate::oxigraph::sparql::{EvaluationError, Update, UpdateOptions}; |
||||
use crate::oxigraph::storage::numeric_encoder::{Decoder, EncodedTerm}; |
||||
use crate::oxigraph::storage::StorageWriter; |
||||
use crate::spargebra::algebra::{GraphPattern, GraphTarget}; |
||||
use crate::spargebra::term::{ |
||||
BlankNode, GraphName, GraphNamePattern, GroundQuad, GroundQuadPattern, GroundSubject, |
||||
GroundTerm, GroundTermPattern, GroundTriple, GroundTriplePattern, NamedNode, NamedNodePattern, |
||||
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable, |
||||
}; |
||||
use spargebra::GraphUpdateOperation; |
||||
use sparopt::Optimizer; |
||||
use crate::spargebra::GraphUpdateOperation; |
||||
use crate::sparopt; |
||||
use crate::sparopt::Optimizer; |
||||
use oxiri::Iri; |
||||
use std::collections::HashMap; |
||||
use std::io; |
||||
use std::rc::Rc; |
@ -1,7 +1,7 @@ |
||||
//! TODO: This storage is dramatically naive.
|
||||
|
||||
use crate::storage::StorageError; |
||||
use crate::store::CorruptionError; |
||||
use crate::oxigraph::storage::StorageError; |
||||
use crate::oxigraph::store::CorruptionError; |
||||
use std::cell::RefCell; |
||||
use std::collections::{BTreeMap, HashMap}; |
||||
use std::error::Error; |
@ -1,20 +1,24 @@ |
||||
#![allow(clippy::same_name_method)] |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
use crate::model::Quad; |
||||
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; |
||||
use crate::storage::backend::{Reader, Transaction}; |
||||
use crate::oxigraph::model::Quad; |
||||
use crate::oxigraph::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; |
||||
use crate::oxigraph::storage::backend::{Reader, Transaction}; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; |
||||
use crate::storage::binary_encoder::{ |
||||
use crate::oxigraph::storage::binary_encoder::LATEST_STORAGE_VERSION; |
||||
use crate::oxigraph::storage::binary_encoder::{ |
||||
decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, |
||||
write_gosp_quad, write_gpos_quad, write_gspo_quad, write_osp_quad, write_ospg_quad, |
||||
write_pos_quad, write_posg_quad, write_spo_quad, write_spog_quad, write_term, QuadEncoding, |
||||
WRITTEN_TERM_MAX_SIZE, |
||||
}; |
||||
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; |
||||
pub use crate::oxigraph::storage::error::{ |
||||
CorruptionError, LoaderError, SerializerError, StorageError, |
||||
}; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
use crate::storage::numeric_encoder::Decoder; |
||||
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; |
||||
use crate::oxigraph::storage::numeric_encoder::Decoder; |
||||
use crate::oxigraph::storage::numeric_encoder::{ |
||||
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, |
||||
}; |
||||
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; |
||||
#[cfg(all(not(target_family = "wasm")))] |
||||
use std::collections::VecDeque; |
@ -1,9 +1,9 @@ |
||||
#![allow(clippy::unreadable_literal)] |
||||
|
||||
use crate::model::*; |
||||
use crate::storage::error::{CorruptionError, StorageError}; |
||||
use crate::storage::small_string::SmallString; |
||||
use oxsdatatypes::*; |
||||
use crate::oxigraph::model::*; |
||||
use crate::oxigraph::storage::error::{CorruptionError, StorageError}; |
||||
use crate::oxigraph::storage::small_string::SmallString; |
||||
use crate::oxsdatatypes::*; |
||||
use siphasher::sip128::{Hasher128, SipHasher24}; |
||||
use std::fmt::Debug; |
||||
use std::hash::{Hash, Hasher}; |
@ -0,0 +1,51 @@ |
||||
OxRDF |
||||
===== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdf.svg)](https://crates.io/crates/oxrdf) |
||||
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/). |
||||
|
||||
This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra). |
||||
|
||||
Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is available behind the `rdf-star` feature. |
||||
|
||||
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/). |
||||
|
||||
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files. |
||||
|
||||
Usage example: |
||||
|
||||
```rust |
||||
use oxrdf::*; |
||||
|
||||
let mut graph = Graph::default(); |
||||
|
||||
// insertion |
||||
let ex = NamedNodeRef::new("http://example.com").unwrap(); |
||||
let triple = TripleRef::new(ex, ex, ex); |
||||
graph.insert(triple); |
||||
|
||||
// simple filter |
||||
let results: Vec<_> = graph.triples_for_subject(ex).collect(); |
||||
assert_eq!(vec![triple], results); |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,403 @@ |
||||
use rand::random; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::io::Write; |
||||
use std::{fmt, str}; |
||||
|
||||
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
|
||||
///
|
||||
/// The common way to create a new blank node is to use the [`BlankNode::default()`] function.
|
||||
///
|
||||
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function.
|
||||
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::BlankNode;
|
||||
///
|
||||
/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
|
||||
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
pub struct BlankNode(BlankNodeContent); |
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
enum BlankNodeContent { |
||||
Named(String), |
||||
Anonymous { id: u128, str: IdStr }, |
||||
} |
||||
|
||||
impl BlankNode { |
||||
/// Creates a blank node from a unique identifier.
|
||||
///
|
||||
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
|
||||
/// that creates a random ID that could be easily inlined by Oxigraph stores.
|
||||
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> { |
||||
let id = id.into(); |
||||
validate_blank_node_identifier(&id)?; |
||||
Ok(Self::new_unchecked(id)) |
||||
} |
||||
|
||||
/// Creates a blank node from a unique identifier without validation.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
|
||||
/// according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub fn new_unchecked(id: impl Into<String>) -> Self { |
||||
let id = id.into(); |
||||
if let Some(numerical_id) = to_integer_id(&id) { |
||||
Self::new_from_unique_id(numerical_id) |
||||
} else { |
||||
Self(BlankNodeContent::Named(id)) |
||||
} |
||||
} |
||||
|
||||
/// Creates a blank node from a unique numerical id.
|
||||
///
|
||||
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
|
||||
#[inline] |
||||
pub fn new_from_unique_id(id: u128) -> Self { |
||||
Self(BlankNodeContent::Anonymous { |
||||
id, |
||||
str: IdStr::new(id), |
||||
}) |
||||
} |
||||
|
||||
/// Returns the underlying ID of this blank node.
|
||||
#[inline] |
||||
pub fn as_str(&self) -> &str { |
||||
match &self.0 { |
||||
BlankNodeContent::Named(id) => id, |
||||
BlankNodeContent::Anonymous { str, .. } => str.as_str(), |
||||
} |
||||
} |
||||
|
||||
/// Returns the underlying ID of this blank node.
|
||||
#[inline] |
||||
pub fn into_string(self) -> String { |
||||
match self.0 { |
||||
BlankNodeContent::Named(id) => id, |
||||
BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_ref(&self) -> BlankNodeRef<'_> { |
||||
BlankNodeRef(match &self.0 { |
||||
BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()), |
||||
BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous { |
||||
id: *id, |
||||
str: str.as_str(), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for BlankNode { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.as_ref().fmt(f) |
||||
} |
||||
} |
||||
|
||||
impl Default for BlankNode { |
||||
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
|
||||
#[inline] |
||||
fn default() -> Self { |
||||
// We ensure the ID does not start with a number to be also valid with RDF/XML
|
||||
loop { |
||||
let id = random(); |
||||
let str = IdStr::new(id); |
||||
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) { |
||||
return Self(BlankNodeContent::Anonymous { id, str }); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
|
||||
///
|
||||
/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method.
|
||||
///
|
||||
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function.
|
||||
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::BlankNodeRef;
|
||||
///
|
||||
/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
|
||||
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>); |
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] |
||||
enum BlankNodeRefContent<'a> { |
||||
Named(&'a str), |
||||
Anonymous { id: u128, str: &'a str }, |
||||
} |
||||
|
||||
impl<'a> BlankNodeRef<'a> { |
||||
/// Creates a blank node from a unique identifier.
|
||||
///
|
||||
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
|
||||
/// that creates a random ID that could be easily inlined by Oxigraph stores.
|
||||
pub fn new(id: &'a str) -> Result<Self, BlankNodeIdParseError> { |
||||
validate_blank_node_identifier(id)?; |
||||
Ok(Self::new_unchecked(id)) |
||||
} |
||||
|
||||
/// Creates a blank node from a unique identifier without validation.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
|
||||
/// according to N-Triples, Turtle, and SPARQL grammars.
|
||||
///
|
||||
/// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub fn new_unchecked(id: &'a str) -> Self { |
||||
if let Some(numerical_id) = to_integer_id(id) { |
||||
Self(BlankNodeRefContent::Anonymous { |
||||
id: numerical_id, |
||||
str: id, |
||||
}) |
||||
} else { |
||||
Self(BlankNodeRefContent::Named(id)) |
||||
} |
||||
} |
||||
|
||||
/// Returns the underlying ID of this blank node.
|
||||
#[inline] |
||||
pub const fn as_str(self) -> &'a str { |
||||
match self.0 { |
||||
BlankNodeRefContent::Named(id) => id, |
||||
BlankNodeRefContent::Anonymous { str, .. } => str, |
||||
} |
||||
} |
||||
|
||||
/// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::BlankNode;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
|
||||
/// Some(128)
|
||||
/// );
|
||||
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
|
||||
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn unique_id(&self) -> Option<u128> { |
||||
match self.0 { |
||||
BlankNodeRefContent::Named(_) => None, |
||||
BlankNodeRefContent::Anonymous { id, .. } => Some(id), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_owned(self) -> BlankNode { |
||||
BlankNode(match self.0 { |
||||
BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()), |
||||
BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous { |
||||
id, |
||||
str: IdStr::new(id), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for BlankNodeRef<'_> { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
write!(f, "_:{}", self.as_str()) |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> { |
||||
#[inline] |
||||
fn from(node: &'a BlankNode) -> Self { |
||||
node.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<BlankNodeRef<'a>> for BlankNode { |
||||
#[inline] |
||||
fn from(node: BlankNodeRef<'a>) -> Self { |
||||
node.into_owned() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<BlankNode> for BlankNodeRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &BlankNode) -> bool { |
||||
*self == other.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<BlankNodeRef<'_>> for BlankNode { |
||||
#[inline] |
||||
fn eq(&self, other: &BlankNodeRef<'_>) -> bool { |
||||
self.as_ref() == *other |
||||
} |
||||
} |
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
struct IdStr([u8; 32]); |
||||
|
||||
impl IdStr { |
||||
#[inline] |
||||
fn new(id: u128) -> Self { |
||||
let mut str = [0; 32]; |
||||
write!(&mut str[..], "{id:x}").unwrap(); |
||||
Self(str) |
||||
} |
||||
|
||||
#[inline] |
||||
fn as_str(&self) -> &str { |
||||
let len = self.0.iter().position(|x| x == &0).unwrap_or(32); |
||||
str::from_utf8(&self.0[..len]).unwrap() |
||||
} |
||||
} |
||||
|
||||
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { |
||||
let mut chars = id.chars(); |
||||
let front = chars.next().ok_or(BlankNodeIdParseError)?; |
||||
match front { |
||||
'0'..='9' |
||||
| '_' |
||||
| ':' |
||||
| 'A'..='Z' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}' => (), |
||||
_ => return Err(BlankNodeIdParseError), |
||||
} |
||||
for c in chars { |
||||
match c { |
||||
'.' // validated later
|
||||
| '-' |
||||
| '0'..='9' |
||||
| '\u{00B7}' |
||||
| '\u{0300}'..='\u{036F}' |
||||
| '\u{203F}'..='\u{2040}' |
||||
| '_' |
||||
| ':' |
||||
| 'A'..='Z' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}' => (), |
||||
_ => return Err(BlankNodeIdParseError), |
||||
} |
||||
} |
||||
|
||||
// Could not end with a dot
|
||||
if id.ends_with('.') { |
||||
Err(BlankNodeIdParseError) |
||||
} else { |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
fn to_integer_id(id: &str) -> Option<u128> { |
||||
let digits = id.as_bytes(); |
||||
let mut value: u128 = 0; |
||||
if let None | Some(b'0') = digits.first() { |
||||
return None; // No empty string or leading zeros
|
||||
} |
||||
for digit in digits { |
||||
value = value.checked_mul(16)?.checked_add( |
||||
match *digit { |
||||
b'0'..=b'9' => digit - b'0', |
||||
b'a'..=b'f' => digit - b'a' + 10, |
||||
_ => return None, |
||||
} |
||||
.into(), |
||||
)?; |
||||
} |
||||
Some(value) |
||||
} |
||||
|
||||
/// An error raised during [`BlankNode`] IDs validation.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error("The blank node identifier is invalid")] |
||||
pub struct BlankNodeIdParseError; |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn as_str_partial() { |
||||
let b = BlankNode::new_from_unique_id(0x42); |
||||
assert_eq!(b.as_str(), "42"); |
||||
} |
||||
|
||||
#[test] |
||||
fn as_str_full() { |
||||
let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000); |
||||
assert_eq!(b.as_str(), "77776666555544443333222211110000"); |
||||
} |
||||
|
||||
#[test] |
||||
fn new_validation() { |
||||
BlankNode::new("").unwrap_err(); |
||||
BlankNode::new("a").unwrap(); |
||||
BlankNode::new("-").unwrap_err(); |
||||
BlankNode::new("a-").unwrap(); |
||||
BlankNode::new(".").unwrap_err(); |
||||
BlankNode::new("a.").unwrap_err(); |
||||
BlankNode::new("a.a").unwrap(); |
||||
} |
||||
|
||||
#[test] |
||||
fn new_numerical() { |
||||
assert_eq!( |
||||
BlankNode::new("100a").unwrap(), |
||||
BlankNode::new_from_unique_id(0x100a), |
||||
); |
||||
assert_ne!( |
||||
BlankNode::new("100A").unwrap(), |
||||
BlankNode::new_from_unique_id(0x100a) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_equals() { |
||||
assert_eq!( |
||||
BlankNode::new("100a").unwrap(), |
||||
BlankNodeRef::new("100a").unwrap() |
||||
); |
||||
assert_eq!( |
||||
BlankNode::new("zzz").unwrap(), |
||||
BlankNodeRef::new("zzz").unwrap() |
||||
); |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,284 @@ |
||||
//! [In-memory implementation](Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
||||
//!
|
||||
//! Usage example:
|
||||
//! ```
|
||||
//! use oxrdf::*;
|
||||
//!
|
||||
//! let mut graph = Graph::default();
|
||||
//!
|
||||
//! // insertion
|
||||
//! let ex = NamedNodeRef::new("http://example.com")?;
|
||||
//! let triple = TripleRef::new(ex, ex, ex);
|
||||
//! graph.insert(triple);
|
||||
//!
|
||||
//! // simple filter
|
||||
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
|
||||
//! assert_eq!(vec![triple], results);
|
||||
//!
|
||||
//! // Print
|
||||
//! assert_eq!(
|
||||
//! graph.to_string(),
|
||||
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
|
||||
//! );
|
||||
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
//! ```
|
||||
//!
|
||||
//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time.
|
||||
|
||||
pub use crate::oxrdf::dataset::CanonicalizationAlgorithm; |
||||
use crate::oxrdf::dataset::*; |
||||
use crate::oxrdf::*; |
||||
use std::fmt; |
||||
|
||||
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
||||
///
|
||||
/// It can accommodate a fairly large number of triples (in the few millions).
|
||||
///
|
||||
/// <div class="warning">It interns the string and does not do any garbage collection yet:
|
||||
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
|
||||
///
|
||||
/// Usage example:
|
||||
/// ```
|
||||
/// use oxrdf::*;
|
||||
///
|
||||
/// let mut graph = Graph::default();
|
||||
///
|
||||
/// // insertion
|
||||
/// let ex = NamedNodeRef::new("http://example.com")?;
|
||||
/// let triple = TripleRef::new(ex, ex, ex);
|
||||
/// graph.insert(triple);
|
||||
///
|
||||
/// // simple filter
|
||||
/// let results: Vec<_> = graph.triples_for_subject(ex).collect();
|
||||
/// assert_eq!(vec![triple], results);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Debug, Default, Clone)] |
||||
pub struct Graph { |
||||
dataset: Dataset, |
||||
} |
||||
|
||||
impl Graph { |
||||
/// Creates a new graph.
|
||||
pub fn new() -> Self { |
||||
Self::default() |
||||
} |
||||
|
||||
fn graph(&self) -> GraphView<'_> { |
||||
self.dataset.graph(GraphNameRef::DefaultGraph) |
||||
} |
||||
|
||||
fn graph_mut(&mut self) -> GraphViewMut<'_> { |
||||
self.dataset.graph_mut(GraphNameRef::DefaultGraph) |
||||
} |
||||
|
||||
/// Returns all the triples contained by the graph.
|
||||
pub fn iter(&self) -> Iter<'_> { |
||||
Iter { |
||||
inner: self.graph().iter(), |
||||
} |
||||
} |
||||
|
||||
pub fn triples_for_subject<'a, 'b>( |
||||
&'a self, |
||||
subject: impl Into<SubjectRef<'b>>, |
||||
) -> impl Iterator<Item = TripleRef<'a>> + 'a { |
||||
self.graph() |
||||
.triples_for_interned_subject(self.dataset.encoded_subject(subject)) |
||||
} |
||||
|
||||
pub fn objects_for_subject_predicate<'a, 'b>( |
||||
&'a self, |
||||
subject: impl Into<SubjectRef<'b>>, |
||||
predicate: impl Into<NamedNodeRef<'b>>, |
||||
) -> impl Iterator<Item = TermRef<'a>> + 'a { |
||||
self.graph().objects_for_interned_subject_predicate( |
||||
self.dataset.encoded_subject(subject), |
||||
self.dataset.encoded_named_node(predicate), |
||||
) |
||||
} |
||||
|
||||
pub fn object_for_subject_predicate<'a, 'b>( |
||||
&'a self, |
||||
subject: impl Into<SubjectRef<'b>>, |
||||
predicate: impl Into<NamedNodeRef<'b>>, |
||||
) -> Option<TermRef<'a>> { |
||||
self.graph() |
||||
.objects_for_subject_predicate(subject, predicate) |
||||
.next() |
||||
} |
||||
|
||||
pub fn predicates_for_subject_object<'a, 'b>( |
||||
&'a self, |
||||
subject: impl Into<SubjectRef<'b>>, |
||||
object: impl Into<TermRef<'b>>, |
||||
) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a { |
||||
self.graph().predicates_for_interned_subject_object( |
||||
self.dataset.encoded_subject(subject), |
||||
self.dataset.encoded_term(object), |
||||
) |
||||
} |
||||
|
||||
pub fn triples_for_predicate<'a, 'b>( |
||||
&'a self, |
||||
predicate: impl Into<NamedNodeRef<'b>>, |
||||
) -> impl Iterator<Item = TripleRef<'a>> + 'a { |
||||
self.graph() |
||||
.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) |
||||
} |
||||
|
||||
pub fn subjects_for_predicate_object<'a, 'b>( |
||||
&'a self, |
||||
predicate: impl Into<NamedNodeRef<'b>>, |
||||
object: impl Into<TermRef<'b>>, |
||||
) -> impl Iterator<Item = SubjectRef<'a>> + 'a { |
||||
self.graph().subjects_for_interned_predicate_object( |
||||
self.dataset.encoded_named_node(predicate), |
||||
self.dataset.encoded_term(object), |
||||
) |
||||
} |
||||
|
||||
pub fn subject_for_predicate_object<'a, 'b>( |
||||
&'a self, |
||||
predicate: impl Into<NamedNodeRef<'b>>, |
||||
object: impl Into<TermRef<'b>>, |
||||
) -> Option<SubjectRef<'a>> { |
||||
self.graph().subject_for_predicate_object(predicate, object) |
||||
} |
||||
|
||||
pub fn triples_for_object<'a, 'b>( |
||||
&'a self, |
||||
object: impl Into<TermRef<'b>>, |
||||
) -> impl Iterator<Item = TripleRef<'a>> + 'a { |
||||
self.graph() |
||||
.triples_for_interned_object(self.dataset.encoded_term(object)) |
||||
} |
||||
|
||||
/// Checks if the graph contains the given triple.
|
||||
pub fn contains<'a>(&self, triple: impl Into<TripleRef<'a>>) -> bool { |
||||
self.graph().contains(triple) |
||||
} |
||||
|
||||
/// Returns the number of triples in this graph.
|
||||
pub fn len(&self) -> usize { |
||||
self.dataset.len() |
||||
} |
||||
|
||||
/// Checks if this graph contains a triple.
|
||||
pub fn is_empty(&self) -> bool { |
||||
self.dataset.is_empty() |
||||
} |
||||
|
||||
/// Adds a triple to the graph.
|
||||
pub fn insert<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool { |
||||
self.graph_mut().insert(triple) |
||||
} |
||||
|
||||
/// Removes a concrete triple from the graph.
|
||||
pub fn remove<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool { |
||||
self.graph_mut().remove(triple) |
||||
} |
||||
|
||||
/// Clears the graph.
|
||||
pub fn clear(&mut self) { |
||||
self.dataset.clear() |
||||
} |
||||
|
||||
/// Canonicalizes the dataset by renaming blank nodes.
|
||||
///
|
||||
/// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
|
||||
/// ```
|
||||
/// use oxrdf::graph::CanonicalizationAlgorithm;
|
||||
/// use oxrdf::*;
|
||||
///
|
||||
/// let iri = NamedNodeRef::new("http://example.com")?;
|
||||
///
|
||||
/// let mut graph1 = Graph::new();
|
||||
/// let bnode1 = BlankNode::default();
|
||||
/// graph1.insert(TripleRef::new(iri, iri, &bnode1));
|
||||
/// graph1.insert(TripleRef::new(&bnode1, iri, iri));
|
||||
///
|
||||
/// let mut graph2 = Graph::new();
|
||||
/// let bnode2 = BlankNode::default();
|
||||
/// graph2.insert(TripleRef::new(iri, iri, &bnode2));
|
||||
/// graph2.insert(TripleRef::new(&bnode2, iri, iri));
|
||||
///
|
||||
/// assert_ne!(graph1, graph2);
|
||||
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable);
|
||||
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable);
|
||||
/// assert_eq!(graph1, graph2);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
///
|
||||
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
|
||||
/// Hence, this canonization might not be suitable for diffs.</div>
|
||||
///
|
||||
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div>
|
||||
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { |
||||
self.dataset.canonicalize(algorithm) |
||||
} |
||||
} |
||||
|
||||
impl PartialEq for Graph { |
||||
fn eq(&self, other: &Self) -> bool { |
||||
self.dataset == other.dataset |
||||
} |
||||
} |
||||
|
||||
impl Eq for Graph {} |
||||
|
||||
impl<'a> IntoIterator for &'a Graph { |
||||
type Item = TripleRef<'a>; |
||||
type IntoIter = Iter<'a>; |
||||
|
||||
fn into_iter(self) -> Self::IntoIter { |
||||
self.iter() |
||||
} |
||||
} |
||||
|
||||
impl FromIterator<Triple> for Graph { |
||||
fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self { |
||||
let mut g = Self::new(); |
||||
g.extend(iter); |
||||
g |
||||
} |
||||
} |
||||
|
||||
impl<'a, T: Into<TripleRef<'a>>> FromIterator<T> for Graph { |
||||
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { |
||||
let mut g = Self::new(); |
||||
g.extend(iter); |
||||
g |
||||
} |
||||
} |
||||
|
||||
impl Extend<Triple> for Graph { |
||||
fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) { |
||||
self.graph_mut().extend(iter) |
||||
} |
||||
} |
||||
|
||||
impl<'a, T: Into<TripleRef<'a>>> Extend<T> for Graph { |
||||
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { |
||||
self.graph_mut().extend(iter) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Graph { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.graph().fmt(f) |
||||
} |
||||
} |
||||
|
||||
/// Iterator returned by [`Graph::iter`].
|
||||
pub struct Iter<'a> { |
||||
inner: GraphViewIter<'a>, |
||||
} |
||||
|
||||
impl<'a> Iterator for Iter<'a> { |
||||
type Item = TripleRef<'a>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.inner.next() |
||||
} |
||||
} |
@ -0,0 +1,535 @@ |
||||
//! Interning of RDF elements using Rodeo
|
||||
|
||||
use crate::oxrdf::*; |
||||
use std::collections::hash_map::{Entry, HashMap, RandomState}; |
||||
use std::hash::{BuildHasher, Hasher}; |
||||
|
||||
#[derive(Debug, Default, Clone)] |
||||
pub struct Interner { |
||||
hasher: RandomState, |
||||
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>, |
||||
string_for_blank_node_id: HashMap<u128, String>, |
||||
#[cfg(feature = "rdf-star")] |
||||
triples: HashMap<InternedTriple, Triple>, |
||||
} |
||||
|
||||
impl Interner { |
||||
#[allow(clippy::never_loop)] |
||||
fn get_or_intern(&mut self, value: &str) -> Key { |
||||
let mut hash = self.hash(value); |
||||
loop { |
||||
match self.string_for_hash.entry(hash) { |
||||
Entry::Vacant(e) => { |
||||
e.insert(value.into()); |
||||
return Key(hash); |
||||
} |
||||
Entry::Occupied(e) => loop { |
||||
if e.get() == value { |
||||
return Key(hash); |
||||
} else if hash == u64::MAX - 1 { |
||||
hash = 0; |
||||
} else { |
||||
hash += 1; |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn get(&self, value: &str) -> Option<Key> { |
||||
let mut hash = self.hash(value); |
||||
loop { |
||||
let v = self.string_for_hash.get(&hash)?; |
||||
if v == value { |
||||
return Some(Key(hash)); |
||||
} else if hash == u64::MAX - 1 { |
||||
hash = 0; |
||||
} else { |
||||
hash += 1; |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn hash(&self, value: &str) -> u64 { |
||||
let mut hasher = self.hasher.build_hasher(); |
||||
hasher.write(value.as_bytes()); |
||||
let hash = hasher.finish(); |
||||
if hash == u64::MAX { |
||||
0 |
||||
} else { |
||||
hash |
||||
} |
||||
} |
||||
|
||||
fn resolve(&self, key: Key) -> &str { |
||||
&self.string_for_hash[&key.0] |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub struct Key(u64); |
||||
|
||||
impl Key { |
||||
fn first() -> Self { |
||||
Self(0) |
||||
} |
||||
|
||||
fn next(self) -> Self { |
||||
Self(self.0.saturating_add(1)) |
||||
} |
||||
|
||||
fn impossible() -> Self { |
||||
Self(u64::MAX) |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub struct InternedNamedNode { |
||||
id: Key, |
||||
} |
||||
|
||||
impl InternedNamedNode { |
||||
pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self { |
||||
Self { |
||||
id: interner.get_or_intern(named_node.as_str()), |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> { |
||||
Some(Self { |
||||
id: interner.get(named_node.as_str())?, |
||||
}) |
||||
} |
||||
|
||||
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> { |
||||
NamedNodeRef::new_unchecked(interner.resolve(self.id)) |
||||
} |
||||
|
||||
pub fn first() -> Self { |
||||
Self { id: Key::first() } |
||||
} |
||||
|
||||
pub fn next(self) -> Self { |
||||
Self { id: self.id.next() } |
||||
} |
||||
|
||||
pub fn impossible() -> Self { |
||||
Self { |
||||
id: Key::impossible(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub enum InternedBlankNode { |
||||
Number { id: u128 }, |
||||
Other { id: Key }, |
||||
} |
||||
|
||||
impl InternedBlankNode { |
||||
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { |
||||
if let Some(id) = blank_node.unique_id() { |
||||
interner |
||||
.string_for_blank_node_id |
||||
.entry(id) |
||||
.or_insert_with(|| blank_node.as_str().into()); |
||||
Self::Number { id } |
||||
} else { |
||||
Self::Other { |
||||
id: interner.get_or_intern(blank_node.as_str()), |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> { |
||||
if let Some(id) = blank_node.unique_id() { |
||||
interner |
||||
.string_for_blank_node_id |
||||
.contains_key(&id) |
||||
.then_some(Self::Number { id }) |
||||
} else { |
||||
Some(Self::Other { |
||||
id: interner.get(blank_node.as_str())?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> { |
||||
BlankNodeRef::new_unchecked(match self { |
||||
Self::Number { id } => &interner.string_for_blank_node_id[&id], |
||||
Self::Other { id } => interner.resolve(id), |
||||
}) |
||||
} |
||||
|
||||
pub fn next(self) -> Self { |
||||
match self { |
||||
Self::Number { id } => Self::Number { |
||||
id: id.saturating_add(1), |
||||
}, |
||||
Self::Other { id } => Self::Other { id: id.next() }, |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub enum InternedLiteral { |
||||
String { |
||||
value_id: Key, |
||||
}, |
||||
LanguageTaggedString { |
||||
value_id: Key, |
||||
language_id: Key, |
||||
}, |
||||
TypedLiteral { |
||||
value_id: Key, |
||||
datatype: InternedNamedNode, |
||||
}, |
||||
} |
||||
|
||||
impl InternedLiteral { |
||||
pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self { |
||||
let value_id = interner.get_or_intern(literal.value()); |
||||
if literal.is_plain() { |
||||
if let Some(language) = literal.language() { |
||||
Self::LanguageTaggedString { |
||||
value_id, |
||||
language_id: interner.get_or_intern(language), |
||||
} |
||||
} else { |
||||
Self::String { value_id } |
||||
} |
||||
} else { |
||||
Self::TypedLiteral { |
||||
value_id, |
||||
datatype: InternedNamedNode::encoded_into(literal.datatype(), interner), |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> { |
||||
let value_id = interner.get(literal.value())?; |
||||
Some(if literal.is_plain() { |
||||
if let Some(language) = literal.language() { |
||||
Self::LanguageTaggedString { |
||||
value_id, |
||||
language_id: interner.get(language)?, |
||||
} |
||||
} else { |
||||
Self::String { value_id } |
||||
} |
||||
} else { |
||||
Self::TypedLiteral { |
||||
value_id, |
||||
datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?, |
||||
} |
||||
}) |
||||
} |
||||
|
||||
pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> { |
||||
match self { |
||||
Self::String { value_id } => { |
||||
LiteralRef::new_simple_literal(interner.resolve(*value_id)) |
||||
} |
||||
Self::LanguageTaggedString { |
||||
value_id, |
||||
language_id, |
||||
} => LiteralRef::new_language_tagged_literal_unchecked( |
||||
interner.resolve(*value_id), |
||||
interner.resolve(*language_id), |
||||
), |
||||
Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( |
||||
interner.resolve(*value_id), |
||||
datatype.decode_from(interner), |
||||
), |
||||
} |
||||
} |
||||
|
||||
pub fn next(&self) -> Self { |
||||
match self { |
||||
Self::String { value_id } => Self::String { |
||||
value_id: value_id.next(), |
||||
}, |
||||
Self::LanguageTaggedString { |
||||
value_id, |
||||
language_id, |
||||
} => Self::LanguageTaggedString { |
||||
value_id: *value_id, |
||||
language_id: language_id.next(), |
||||
}, |
||||
Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { |
||||
value_id: *value_id, |
||||
datatype: datatype.next(), |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
pub enum InternedSubject { |
||||
NamedNode(InternedNamedNode), |
||||
BlankNode(InternedBlankNode), |
||||
#[cfg(feature = "rdf-star")] |
||||
Triple(Box<InternedTriple>), |
||||
} |
||||
|
||||
impl InternedSubject { |
||||
pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self { |
||||
match node { |
||||
SubjectRef::NamedNode(node) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( |
||||
triple.as_ref(), |
||||
interner, |
||||
))), |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option<Self> { |
||||
Some(match node { |
||||
SubjectRef::NamedNode(node) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( |
||||
triple.as_ref(), |
||||
interner, |
||||
)?)), |
||||
}) |
||||
} |
||||
|
||||
pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> { |
||||
match self { |
||||
Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)), |
||||
Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)), |
||||
#[cfg(feature = "rdf-star")] |
||||
Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]), |
||||
} |
||||
} |
||||
|
||||
pub fn first() -> Self { |
||||
Self::NamedNode(InternedNamedNode::first()) |
||||
} |
||||
|
||||
pub fn next(&self) -> Self { |
||||
match self { |
||||
Self::NamedNode(node) => Self::NamedNode(node.next()), |
||||
Self::BlankNode(node) => Self::BlankNode(node.next()), |
||||
#[cfg(feature = "rdf-star")] |
||||
Self::Triple(triple) => Self::Triple(Box::new(triple.next())), |
||||
} |
||||
} |
||||
|
||||
pub fn impossible() -> Self { |
||||
Self::NamedNode(InternedNamedNode::impossible()) |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
pub enum InternedGraphName { |
||||
DefaultGraph, |
||||
NamedNode(InternedNamedNode), |
||||
BlankNode(InternedBlankNode), |
||||
} |
||||
|
||||
impl InternedGraphName { |
||||
pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self { |
||||
match node { |
||||
GraphNameRef::DefaultGraph => Self::DefaultGraph, |
||||
GraphNameRef::NamedNode(node) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) |
||||
} |
||||
GraphNameRef::BlankNode(node) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option<Self> { |
||||
Some(match node { |
||||
GraphNameRef::DefaultGraph => Self::DefaultGraph, |
||||
GraphNameRef::NamedNode(node) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) |
||||
} |
||||
GraphNameRef::BlankNode(node) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) |
||||
} |
||||
}) |
||||
} |
||||
|
||||
pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> { |
||||
match self { |
||||
Self::DefaultGraph => GraphNameRef::DefaultGraph, |
||||
Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)), |
||||
Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)), |
||||
} |
||||
} |
||||
|
||||
pub fn first() -> Self { |
||||
Self::DefaultGraph |
||||
} |
||||
|
||||
pub fn next(&self) -> Self { |
||||
match self { |
||||
Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()), |
||||
Self::NamedNode(node) => Self::NamedNode(node.next()), |
||||
Self::BlankNode(node) => Self::BlankNode(node.next()), |
||||
} |
||||
} |
||||
|
||||
pub fn impossible() -> Self { |
||||
Self::NamedNode(InternedNamedNode::impossible()) |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
pub enum InternedTerm { |
||||
NamedNode(InternedNamedNode), |
||||
BlankNode(InternedBlankNode), |
||||
Literal(InternedLiteral), |
||||
#[cfg(feature = "rdf-star")] |
||||
Triple(Box<InternedTriple>), |
||||
} |
||||
|
||||
impl InternedTerm { |
||||
pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self { |
||||
match term { |
||||
TermRef::NamedNode(term) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_into(term, interner)) |
||||
} |
||||
TermRef::BlankNode(term) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_into(term, interner)) |
||||
} |
||||
TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)), |
||||
#[cfg(feature = "rdf-star")] |
||||
TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( |
||||
triple.as_ref(), |
||||
interner, |
||||
))), |
||||
} |
||||
} |
||||
|
||||
pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option<Self> { |
||||
Some(match term { |
||||
TermRef::NamedNode(term) => { |
||||
Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?) |
||||
} |
||||
TermRef::BlankNode(term) => { |
||||
Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?) |
||||
} |
||||
TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?), |
||||
#[cfg(feature = "rdf-star")] |
||||
TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( |
||||
triple.as_ref(), |
||||
interner, |
||||
)?)), |
||||
}) |
||||
} |
||||
|
||||
pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> { |
||||
match self { |
||||
Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)), |
||||
Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)), |
||||
Self::Literal(term) => TermRef::Literal(term.decode_from(interner)), |
||||
#[cfg(feature = "rdf-star")] |
||||
Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]), |
||||
} |
||||
} |
||||
|
||||
pub fn first() -> Self { |
||||
Self::NamedNode(InternedNamedNode::first()) |
||||
} |
||||
|
||||
pub fn next(&self) -> Self { |
||||
match self { |
||||
Self::NamedNode(node) => Self::NamedNode(node.next()), |
||||
Self::BlankNode(node) => Self::BlankNode(node.next()), |
||||
Self::Literal(node) => Self::Literal(node.next()), |
||||
#[cfg(feature = "rdf-star")] |
||||
Self::Triple(triple) => Self::Triple(Box::new(triple.next())), |
||||
} |
||||
} |
||||
|
||||
pub fn impossible() -> Self { |
||||
Self::NamedNode(InternedNamedNode::impossible()) |
||||
} |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
pub struct InternedTriple { |
||||
pub subject: InternedSubject, |
||||
pub predicate: InternedNamedNode, |
||||
pub object: InternedTerm, |
||||
} |
||||
|
||||
#[cfg(feature = "rdf-star")] |
||||
impl InternedTriple { |
||||
pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self { |
||||
let interned_triple = Self { |
||||
subject: InternedSubject::encoded_into(triple.subject, interner), |
||||
predicate: InternedNamedNode::encoded_into(triple.predicate, interner), |
||||
object: InternedTerm::encoded_into(triple.object, interner), |
||||
}; |
||||
interner |
||||
.triples |
||||
.insert(interned_triple.clone(), triple.into_owned()); |
||||
interned_triple |
||||
} |
||||
|
||||
pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option<Self> { |
||||
let interned_triple = Self { |
||||
subject: InternedSubject::encoded_from(triple.subject, interner)?, |
||||
predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, |
||||
object: InternedTerm::encoded_from(triple.object, interner)?, |
||||
}; |
||||
interner |
||||
.triples |
||||
.contains_key(&interned_triple) |
||||
.then_some(interned_triple) |
||||
} |
||||
|
||||
pub fn next(&self) -> Self { |
||||
Self { |
||||
subject: self.subject.clone(), |
||||
predicate: self.predicate, |
||||
object: self.object.next(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[derive(Default, Clone)] |
||||
struct IdentityHasherBuilder; |
||||
|
||||
impl BuildHasher for IdentityHasherBuilder { |
||||
type Hasher = IdentityHasher; |
||||
|
||||
fn build_hasher(&self) -> Self::Hasher { |
||||
Self::Hasher::default() |
||||
} |
||||
} |
||||
|
||||
#[derive(Default)] |
||||
struct IdentityHasher { |
||||
value: u64, |
||||
} |
||||
|
||||
impl Hasher for IdentityHasher { |
||||
fn finish(&self) -> u64 { |
||||
self.value |
||||
} |
||||
|
||||
fn write(&mut self, _bytes: &[u8]) { |
||||
unreachable!("Should only be used on u64 values") |
||||
} |
||||
|
||||
fn write_u64(&mut self, i: u64) { |
||||
self.value = i |
||||
} |
||||
} |
@ -0,0 +1,669 @@ |
||||
use crate::oxrdf::named_node::{NamedNode, NamedNodeRef}; |
||||
use crate::oxrdf::vocab::{rdf, xsd}; |
||||
#[cfg(feature = "oxsdatatypes")] |
||||
use crate::oxsdatatypes::*; |
||||
use oxilangtag::{LanguageTag, LanguageTagParseError}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::borrow::Cow; |
||||
use std::fmt; |
||||
use std::fmt::Write; |
||||
|
||||
/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// # use oxilangtag::LanguageTagParseError;
|
||||
/// use oxrdf::vocab::xsd;
|
||||
/// use oxrdf::Literal;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// "\"foo\\nbar\"",
|
||||
/// Literal::new_simple_literal("foo\nbar").to_string()
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
|
||||
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// r#""foo"@en"#,
|
||||
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
|
||||
/// );
|
||||
/// # Result::<(), LanguageTagParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
pub struct Literal(LiteralContent); |
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
enum LiteralContent { |
||||
String(String), |
||||
LanguageTaggedString { value: String, language: String }, |
||||
TypedLiteral { value: String, datatype: NamedNode }, |
||||
} |
||||
|
||||
impl Literal { |
||||
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
|
||||
#[inline] |
||||
pub fn new_simple_literal(value: impl Into<String>) -> Self { |
||||
Self(LiteralContent::String(value.into())) |
||||
} |
||||
|
||||
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
|
||||
#[inline] |
||||
pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self { |
||||
let value = value.into(); |
||||
let datatype = datatype.into(); |
||||
Self(if datatype == xsd::STRING { |
||||
LiteralContent::String(value) |
||||
} else { |
||||
LiteralContent::TypedLiteral { value, datatype } |
||||
}) |
||||
} |
||||
|
||||
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
#[inline] |
||||
pub fn new_language_tagged_literal( |
||||
value: impl Into<String>, |
||||
language: impl Into<String>, |
||||
) -> Result<Self, LanguageTagParseError> { |
||||
let mut language = language.into(); |
||||
language.make_ascii_lowercase(); |
||||
Ok(Self::new_language_tagged_literal_unchecked( |
||||
value, |
||||
LanguageTag::parse(language)?.into_inner(), |
||||
)) |
||||
} |
||||
|
||||
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
///
|
||||
/// It is the responsibility of the caller to check that `language`
|
||||
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
|
||||
/// and is lowercase.
|
||||
///
|
||||
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub fn new_language_tagged_literal_unchecked( |
||||
value: impl Into<String>, |
||||
language: impl Into<String>, |
||||
) -> Self { |
||||
Self(LiteralContent::LanguageTaggedString { |
||||
value: value.into(), |
||||
language: language.into(), |
||||
}) |
||||
} |
||||
|
||||
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form).
|
||||
#[inline] |
||||
pub fn value(&self) -> &str { |
||||
self.as_ref().value() |
||||
} |
||||
|
||||
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
///
|
||||
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
|
||||
/// They are normalized to lowercase by this implementation.
|
||||
#[inline] |
||||
pub fn language(&self) -> Option<&str> { |
||||
self.as_ref().language() |
||||
} |
||||
|
||||
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
|
||||
///
|
||||
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
|
||||
#[inline] |
||||
pub fn datatype(&self) -> NamedNodeRef<'_> { |
||||
self.as_ref().datatype() |
||||
} |
||||
|
||||
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
|
||||
///
|
||||
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
|
||||
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
|
||||
#[inline] |
||||
pub fn is_plain(&self) -> bool { |
||||
self.as_ref().is_plain() |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_ref(&self) -> LiteralRef<'_> { |
||||
LiteralRef(match &self.0 { |
||||
LiteralContent::String(value) => LiteralRefContent::String(value), |
||||
LiteralContent::LanguageTaggedString { value, language } => { |
||||
LiteralRefContent::LanguageTaggedString { value, language } |
||||
} |
||||
LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral { |
||||
value, |
||||
datatype: datatype.as_ref(), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// Extract components from this literal (value, datatype and language tag).
|
||||
#[inline] |
||||
pub fn destruct(self) -> (String, Option<NamedNode>, Option<String>) { |
||||
match self.0 { |
||||
LiteralContent::String(s) => (s, None, None), |
||||
LiteralContent::LanguageTaggedString { value, language } => { |
||||
(value, None, Some(language)) |
||||
} |
||||
LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Literal { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.as_ref().fmt(f) |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a str> for Literal { |
||||
#[inline] |
||||
fn from(value: &'a str) -> Self { |
||||
Self(LiteralContent::String(value.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<String> for Literal { |
||||
#[inline] |
||||
fn from(value: String) -> Self { |
||||
Self(LiteralContent::String(value)) |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<Cow<'a, str>> for Literal { |
||||
#[inline] |
||||
fn from(value: Cow<'a, str>) -> Self { |
||||
Self(LiteralContent::String(value.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<bool> for Literal { |
||||
#[inline] |
||||
fn from(value: bool) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::BOOLEAN.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<i128> for Literal { |
||||
#[inline] |
||||
fn from(value: i128) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<i64> for Literal { |
||||
#[inline] |
||||
fn from(value: i64) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<i32> for Literal { |
||||
#[inline] |
||||
fn from(value: i32) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<i16> for Literal { |
||||
#[inline] |
||||
fn from(value: i16) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<u64> for Literal { |
||||
#[inline] |
||||
fn from(value: u64) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<u32> for Literal { |
||||
#[inline] |
||||
fn from(value: u32) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<u16> for Literal { |
||||
#[inline] |
||||
fn from(value: u16) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: value.to_string(), |
||||
datatype: xsd::INTEGER.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<f32> for Literal { |
||||
#[inline] |
||||
fn from(value: f32) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: if value == f32::INFINITY { |
||||
"INF".to_owned() |
||||
} else if value == f32::NEG_INFINITY { |
||||
"-INF".to_owned() |
||||
} else { |
||||
value.to_string() |
||||
}, |
||||
datatype: xsd::FLOAT.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<f64> for Literal { |
||||
#[inline] |
||||
fn from(value: f64) -> Self { |
||||
Self(LiteralContent::TypedLiteral { |
||||
value: if value == f64::INFINITY { |
||||
"INF".to_owned() |
||||
} else if value == f64::NEG_INFINITY { |
||||
"-INF".to_owned() |
||||
} else { |
||||
value.to_string() |
||||
}, |
||||
datatype: xsd::DOUBLE.into(), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Boolean> for Literal { |
||||
#[inline] |
||||
fn from(value: Boolean) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::BOOLEAN) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Float> for Literal { |
||||
#[inline] |
||||
fn from(value: Float) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::FLOAT) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Double> for Literal { |
||||
#[inline] |
||||
fn from(value: Double) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DOUBLE) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Integer> for Literal { |
||||
#[inline] |
||||
fn from(value: Integer) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::INTEGER) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Decimal> for Literal { |
||||
#[inline] |
||||
fn from(value: Decimal) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DECIMAL) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<DateTime> for Literal { |
||||
#[inline] |
||||
fn from(value: DateTime) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DATE_TIME) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Time> for Literal { |
||||
#[inline] |
||||
fn from(value: Time) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::TIME) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Date> for Literal { |
||||
#[inline] |
||||
fn from(value: Date) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DATE) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<GYearMonth> for Literal { |
||||
#[inline] |
||||
fn from(value: GYearMonth) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::G_YEAR_MONTH) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<GYear> for Literal { |
||||
#[inline] |
||||
fn from(value: GYear) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::G_YEAR) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<GMonthDay> for Literal { |
||||
#[inline] |
||||
fn from(value: GMonthDay) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::G_MONTH_DAY) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<GMonth> for Literal { |
||||
#[inline] |
||||
fn from(value: GMonth) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::G_MONTH) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<GDay> for Literal { |
||||
#[inline] |
||||
fn from(value: GDay) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::G_DAY) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<Duration> for Literal { |
||||
#[inline] |
||||
fn from(value: Duration) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DURATION) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<YearMonthDuration> for Literal { |
||||
#[inline] |
||||
fn from(value: YearMonthDuration) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::YEAR_MONTH_DURATION) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "oxsdatatypes")] |
||||
impl From<DayTimeDuration> for Literal { |
||||
#[inline] |
||||
fn from(value: DayTimeDuration) -> Self { |
||||
Self::new_typed_literal(value.to_string(), xsd::DAY_TIME_DURATION) |
||||
} |
||||
} |
||||
|
||||
/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::xsd;
|
||||
/// use oxrdf::LiteralRef;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// "\"foo\\nbar\"",
|
||||
/// LiteralRef::new_simple_literal("foo\nbar").to_string()
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
|
||||
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
|
||||
/// );
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
pub struct LiteralRef<'a>(LiteralRefContent<'a>); |
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] |
||||
enum LiteralRefContent<'a> { |
||||
String(&'a str), |
||||
LanguageTaggedString { |
||||
value: &'a str, |
||||
language: &'a str, |
||||
}, |
||||
TypedLiteral { |
||||
value: &'a str, |
||||
datatype: NamedNodeRef<'a>, |
||||
}, |
||||
} |
||||
|
||||
impl<'a> LiteralRef<'a> { |
||||
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
|
||||
#[inline] |
||||
pub const fn new_simple_literal(value: &'a str) -> Self { |
||||
LiteralRef(LiteralRefContent::String(value)) |
||||
} |
||||
|
||||
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
|
||||
#[inline] |
||||
pub fn new_typed_literal(value: &'a str, datatype: impl Into<NamedNodeRef<'a>>) -> Self { |
||||
let datatype = datatype.into(); |
||||
LiteralRef(if datatype == xsd::STRING { |
||||
LiteralRefContent::String(value) |
||||
} else { |
||||
LiteralRefContent::TypedLiteral { value, datatype } |
||||
}) |
||||
} |
||||
|
||||
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
///
|
||||
/// It is the responsibility of the caller to check that `language`
|
||||
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
|
||||
/// and is lowercase.
|
||||
///
|
||||
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self { |
||||
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language }) |
||||
} |
||||
|
||||
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
|
||||
#[inline] |
||||
pub const fn value(self) -> &'a str { |
||||
match self.0 { |
||||
LiteralRefContent::String(value) |
||||
| LiteralRefContent::LanguageTaggedString { value, .. } |
||||
| LiteralRefContent::TypedLiteral { value, .. } => value, |
||||
} |
||||
} |
||||
|
||||
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
///
|
||||
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
|
||||
/// They are normalized to lowercase by this implementation.
|
||||
#[inline] |
||||
pub const fn language(self) -> Option<&'a str> { |
||||
match self.0 { |
||||
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language), |
||||
_ => None, |
||||
} |
||||
} |
||||
|
||||
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
|
||||
///
|
||||
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
|
||||
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
|
||||
#[inline] |
||||
pub const fn datatype(self) -> NamedNodeRef<'a> { |
||||
match self.0 { |
||||
LiteralRefContent::String(_) => xsd::STRING, |
||||
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING, |
||||
LiteralRefContent::TypedLiteral { datatype, .. } => datatype, |
||||
} |
||||
} |
||||
|
||||
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
|
||||
///
|
||||
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
|
||||
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
|
||||
#[inline] |
||||
pub const fn is_plain(self) -> bool { |
||||
matches!( |
||||
self.0, |
||||
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. } |
||||
) |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_owned(self) -> Literal { |
||||
Literal(match self.0 { |
||||
LiteralRefContent::String(value) => LiteralContent::String(value.to_owned()), |
||||
LiteralRefContent::LanguageTaggedString { value, language } => { |
||||
LiteralContent::LanguageTaggedString { |
||||
value: value.to_owned(), |
||||
language: language.to_owned(), |
||||
} |
||||
} |
||||
LiteralRefContent::TypedLiteral { value, datatype } => LiteralContent::TypedLiteral { |
||||
value: value.to_owned(), |
||||
datatype: datatype.into_owned(), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// Extract components from this literal
|
||||
#[inline] |
||||
pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) { |
||||
match self.0 { |
||||
LiteralRefContent::String(s) => (s, None, None), |
||||
LiteralRefContent::LanguageTaggedString { value, language } => { |
||||
(value, None, Some(language)) |
||||
} |
||||
LiteralRefContent::TypedLiteral { value, datatype } => (value, Some(datatype), None), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for LiteralRef<'_> { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
match self.0 { |
||||
LiteralRefContent::String(value) => print_quoted_str(value, f), |
||||
LiteralRefContent::LanguageTaggedString { value, language } => { |
||||
print_quoted_str(value, f)?; |
||||
write!(f, "@{language}") |
||||
} |
||||
LiteralRefContent::TypedLiteral { value, datatype } => { |
||||
print_quoted_str(value, f)?; |
||||
write!(f, "^^{datatype}") |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a Literal> for LiteralRef<'a> { |
||||
#[inline] |
||||
fn from(node: &'a Literal) -> Self { |
||||
node.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<LiteralRef<'a>> for Literal { |
||||
#[inline] |
||||
fn from(node: LiteralRef<'a>) -> Self { |
||||
node.into_owned() |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a str> for LiteralRef<'a> { |
||||
#[inline] |
||||
fn from(value: &'a str) -> Self { |
||||
LiteralRef(LiteralRefContent::String(value)) |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<Literal> for LiteralRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &Literal) -> bool { |
||||
*self == other.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<LiteralRef<'_>> for Literal { |
||||
#[inline] |
||||
fn eq(&self, other: &LiteralRef<'_>) -> bool { |
||||
self.as_ref() == *other |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result { |
||||
f.write_char('"')?; |
||||
for c in string.chars() { |
||||
match c { |
||||
'\u{08}' => f.write_str("\\b"), |
||||
'\t' => f.write_str("\\t"), |
||||
'\n' => f.write_str("\\n"), |
||||
'\u{0C}' => f.write_str("\\f"), |
||||
'\r' => f.write_str("\\r"), |
||||
'"' => f.write_str("\\\""), |
||||
'\\' => f.write_str("\\\\"), |
||||
'\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)), |
||||
_ => f.write_char(c), |
||||
}?; |
||||
} |
||||
f.write_char('"') |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn test_simple_literal_equality() { |
||||
assert_eq!( |
||||
Literal::new_simple_literal("foo"), |
||||
Literal::new_typed_literal("foo", xsd::STRING) |
||||
); |
||||
assert_eq!( |
||||
Literal::new_simple_literal("foo"), |
||||
LiteralRef::new_typed_literal("foo", xsd::STRING) |
||||
); |
||||
assert_eq!( |
||||
LiteralRef::new_simple_literal("foo"), |
||||
Literal::new_typed_literal("foo", xsd::STRING) |
||||
); |
||||
assert_eq!( |
||||
LiteralRef::new_simple_literal("foo"), |
||||
LiteralRef::new_typed_literal("foo", xsd::STRING) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_float_format() { |
||||
assert_eq!("INF", Literal::from(f32::INFINITY).value()); |
||||
assert_eq!("INF", Literal::from(f64::INFINITY).value()); |
||||
assert_eq!("-INF", Literal::from(f32::NEG_INFINITY).value()); |
||||
assert_eq!("-INF", Literal::from(f64::NEG_INFINITY).value()); |
||||
assert_eq!("NaN", Literal::from(f32::NAN).value()); |
||||
assert_eq!("NaN", Literal::from(f64::NAN).value()); |
||||
} |
||||
} |
@ -0,0 +1,24 @@ |
||||
mod blank_node; |
||||
pub mod dataset; |
||||
pub mod graph; |
||||
mod interning; |
||||
mod literal; |
||||
mod named_node; |
||||
mod parser; |
||||
mod triple; |
||||
mod variable; |
||||
pub mod vocab; |
||||
|
||||
pub use crate::oxrdf::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef}; |
||||
pub use crate::oxrdf::dataset::Dataset; |
||||
pub use crate::oxrdf::graph::Graph; |
||||
pub use crate::oxrdf::literal::{Literal, LiteralRef}; |
||||
pub use crate::oxrdf::named_node::{NamedNode, NamedNodeRef}; |
||||
pub use crate::oxrdf::parser::TermParseError; |
||||
pub use crate::oxrdf::triple::{ |
||||
GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject, |
||||
SubjectRef, Term, TermRef, Triple, TripleRef, TryFromTermError, |
||||
}; |
||||
pub use crate::oxrdf::variable::{Variable, VariableNameParseError, VariableRef}; |
||||
pub use oxilangtag::LanguageTagParseError; |
||||
pub use oxiri::IriParseError; |
@ -0,0 +1,237 @@ |
||||
use oxiri::{Iri, IriParseError}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::cmp::Ordering; |
||||
use std::fmt; |
||||
|
||||
/// An owned RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::NamedNode;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// "<http://example.com/foo>",
|
||||
/// NamedNode::new("http://example.com/foo")?.to_string()
|
||||
/// );
|
||||
/// # Result::<_,oxrdf::IriParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Serialize, Deserialize)] |
||||
pub struct NamedNode { |
||||
iri: String, |
||||
} |
||||
|
||||
impl NamedNode { |
||||
/// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
|
||||
pub fn new(iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
Ok(Self::new_from_iri(Iri::parse(iri.into())?)) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn new_from_iri(iri: Iri<String>) -> Self { |
||||
Self::new_unchecked(iri.into_inner()) |
||||
} |
||||
|
||||
/// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `iri` is a valid IRI.
|
||||
///
|
||||
/// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub fn new_unchecked(iri: impl Into<String>) -> Self { |
||||
Self { iri: iri.into() } |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_str(&self) -> &str { |
||||
self.iri.as_str() |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_string(self) -> String { |
||||
self.iri |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_ref(&self) -> NamedNodeRef<'_> { |
||||
NamedNodeRef::new_unchecked(&self.iri) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for NamedNode { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.as_ref().fmt(f) |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<str> for NamedNode { |
||||
#[inline] |
||||
fn eq(&self, other: &str) -> bool { |
||||
self.as_str() == other |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNode> for str { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNode) -> bool { |
||||
self == other.as_str() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<&str> for NamedNode { |
||||
#[inline] |
||||
fn eq(&self, other: &&str) -> bool { |
||||
self == *other |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNode> for &str { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNode) -> bool { |
||||
*self == other |
||||
} |
||||
} |
||||
|
||||
/// A borrowed RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
|
||||
///
|
||||
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// "<http://example.com/foo>",
|
||||
/// NamedNodeRef::new("http://example.com/foo")?.to_string()
|
||||
/// );
|
||||
/// # Result::<_,oxrdf::IriParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub struct NamedNodeRef<'a> { |
||||
iri: &'a str, |
||||
} |
||||
|
||||
impl<'a> NamedNodeRef<'a> { |
||||
/// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
|
||||
pub fn new(iri: &'a str) -> Result<Self, IriParseError> { |
||||
Ok(Self::new_from_iri(Iri::parse(iri)?)) |
||||
} |
||||
|
||||
#[inline] |
||||
pub(crate) fn new_from_iri(iri: Iri<&'a str>) -> Self { |
||||
Self::new_unchecked(iri.into_inner()) |
||||
} |
||||
|
||||
/// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `iri` is a valid IRI.
|
||||
///
|
||||
/// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub const fn new_unchecked(iri: &'a str) -> Self { |
||||
Self { iri } |
||||
} |
||||
|
||||
#[inline] |
||||
pub const fn as_str(self) -> &'a str { |
||||
self.iri |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_owned(self) -> NamedNode { |
||||
NamedNode::new_unchecked(self.iri) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for NamedNodeRef<'_> { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
write!(f, "<{}>", self.as_str()) |
||||
} |
||||
} |
||||
|
||||
impl From<NamedNodeRef<'_>> for NamedNode { |
||||
#[inline] |
||||
fn from(node: NamedNodeRef<'_>) -> Self { |
||||
node.into_owned() |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a NamedNode> for NamedNodeRef<'a> { |
||||
#[inline] |
||||
fn from(node: &'a NamedNode) -> Self { |
||||
node.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNode> for NamedNodeRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNode) -> bool { |
||||
self.as_str() == other.as_str() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNodeRef<'_>> for NamedNode { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNodeRef<'_>) -> bool { |
||||
self.as_str() == other.as_str() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<str> for NamedNodeRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &str) -> bool { |
||||
self.as_str() == other |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNodeRef<'_>> for str { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNodeRef<'_>) -> bool { |
||||
self == other.as_str() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<&str> for NamedNodeRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &&str) -> bool { |
||||
self == *other |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<NamedNodeRef<'_>> for &str { |
||||
#[inline] |
||||
fn eq(&self, other: &NamedNodeRef<'_>) -> bool { |
||||
*self == other |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd<NamedNode> for NamedNodeRef<'_> { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &NamedNode) -> Option<Ordering> { |
||||
self.partial_cmp(&other.as_ref()) |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd<NamedNodeRef<'_>> for NamedNode { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &NamedNodeRef<'_>) -> Option<Ordering> { |
||||
self.as_ref().partial_cmp(other) |
||||
} |
||||
} |
||||
|
||||
impl From<Iri<String>> for NamedNode { |
||||
#[inline] |
||||
fn from(iri: Iri<String>) -> Self { |
||||
Self { |
||||
iri: iri.into_inner(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> { |
||||
#[inline] |
||||
fn from(iri: Iri<&'a str>) -> Self { |
||||
Self { |
||||
iri: iri.into_inner(), |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,469 @@ |
||||
use crate::oxrdf::vocab::xsd; |
||||
use crate::oxrdf::{ |
||||
BlankNode, BlankNodeIdParseError, IriParseError, LanguageTagParseError, Literal, NamedNode, |
||||
Term, Variable, VariableNameParseError, |
||||
}; |
||||
#[cfg(feature = "rdf-star")] |
||||
use crate::oxrdf::{Subject, Triple}; |
||||
use std::char; |
||||
use std::str::{Chars, FromStr}; |
||||
|
||||
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
|
||||
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
|
||||
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 128; |
||||
|
||||
impl FromStr for NamedNode { |
||||
type Err = TermParseError; |
||||
|
||||
/// Parses a named node from its NTriples and Turtle serialization
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::NamedNode;
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// NamedNode::from_str("<http://example.com>").unwrap(),
|
||||
/// NamedNode::new("http://example.com").unwrap()
|
||||
/// )
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { |
||||
let (term, left) = read_named_node(s)?; |
||||
if !left.is_empty() { |
||||
return Err(Self::Err::msg( |
||||
"Named node serialization should end with a >", |
||||
)); |
||||
} |
||||
Ok(term) |
||||
} |
||||
} |
||||
|
||||
impl FromStr for BlankNode { |
||||
type Err = TermParseError; |
||||
|
||||
/// Parses a blank node from its NTriples and Turtle serialization
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::BlankNode;
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// BlankNode::from_str("_:ex").unwrap(),
|
||||
/// BlankNode::new("ex").unwrap()
|
||||
/// )
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { |
||||
let (term, left) = read_blank_node(s)?; |
||||
if !left.is_empty() { |
||||
return Err(Self::Err::msg( |
||||
"Blank node serialization should not contain whitespaces", |
||||
)); |
||||
} |
||||
Ok(term) |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Literal { |
||||
type Err = TermParseError; |
||||
|
||||
/// Parses a literal from its NTriples or Turtle serialization
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::vocab::xsd;
|
||||
/// use oxrdf::{Literal, NamedNode};
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("\"ex\\n\"").unwrap(),
|
||||
/// Literal::new_simple_literal("ex\n")
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("\"ex\"@en").unwrap(),
|
||||
/// Literal::new_language_tagged_literal("ex", "en").unwrap()
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
|
||||
/// Literal::new_typed_literal(
|
||||
/// "2020",
|
||||
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
|
||||
/// )
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("true").unwrap(),
|
||||
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("+122").unwrap(),
|
||||
/// Literal::new_typed_literal("+122", xsd::INTEGER)
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("-122.23").unwrap(),
|
||||
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// Literal::from_str("-122e+1").unwrap(),
|
||||
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
|
||||
/// );
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { |
||||
let (term, left) = read_literal(s)?; |
||||
if !left.is_empty() { |
||||
return Err(Self::Err::msg("Invalid literal serialization")); |
||||
} |
||||
Ok(term) |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Term { |
||||
type Err = TermParseError; |
||||
|
||||
/// Parses a term from its NTriples or Turtle serialization
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::*;
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// Term::from_str("\"ex\"").unwrap(),
|
||||
/// Literal::new_simple_literal("ex").into()
|
||||
/// );
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { |
||||
let (term, left) = read_term(s, 0)?; |
||||
if !left.is_empty() { |
||||
return Err(Self::Err::msg("Invalid term serialization")); |
||||
} |
||||
Ok(term) |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Variable { |
||||
type Err = TermParseError; |
||||
|
||||
/// Parses a variable from its SPARQL serialization
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::Variable;
|
||||
/// use std::str::FromStr;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// Variable::from_str("$foo").unwrap(),
|
||||
/// Variable::new("foo").unwrap()
|
||||
/// )
|
||||
/// ```
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { |
||||
if !s.starts_with('?') && !s.starts_with('$') { |
||||
return Err(Self::Err::msg( |
||||
"Variable serialization should start with ? or $", |
||||
)); |
||||
} |
||||
Self::new(&s[1..]).map_err(|error| { |
||||
TermParseError(TermParseErrorKind::Variable { |
||||
value: s.to_owned(), |
||||
error, |
||||
}) |
||||
}) |
||||
} |
||||
} |
||||
|
||||
fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> { |
||||
let s = s.trim(); |
||||
if let Some(remain) = s.strip_prefix('<') { |
||||
let end = remain |
||||
.find('>') |
||||
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?; |
||||
let (value, remain) = remain.split_at(end); |
||||
let remain = &remain[1..]; |
||||
let term = NamedNode::new(value).map_err(|error| { |
||||
TermParseError(TermParseErrorKind::Iri { |
||||
value: value.to_owned(), |
||||
error, |
||||
}) |
||||
})?; |
||||
Ok((term, remain)) |
||||
} else { |
||||
Err(TermParseError::msg( |
||||
"Named node serialization should start with a <", |
||||
)) |
||||
} |
||||
} |
||||
|
||||
fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> { |
||||
let s = s.trim(); |
||||
if let Some(remain) = s.strip_prefix("_:") { |
||||
let end = remain |
||||
.find(|v: char| { |
||||
v.is_whitespace() |
||||
|| matches!(v, '<' | '_' | '?' | '$' | '"' | '\'' | '>' | '@' | '^') |
||||
}) |
||||
.unwrap_or(remain.len()); |
||||
let (value, remain) = remain.split_at(end); |
||||
let term = BlankNode::new(value).map_err(|error| { |
||||
TermParseError(TermParseErrorKind::BlankNode { |
||||
value: value.to_owned(), |
||||
error, |
||||
}) |
||||
})?; |
||||
Ok((term, remain)) |
||||
} else { |
||||
Err(TermParseError::msg( |
||||
"Blank node serialization should start with '_:'", |
||||
)) |
||||
} |
||||
} |
||||
|
||||
fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> { |
||||
let s = s.trim(); |
||||
if let Some(s) = s.strip_prefix('"') { |
||||
let mut value = String::with_capacity(s.len()); |
||||
let mut chars = s.chars(); |
||||
while let Some(c) = chars.next() { |
||||
match c { |
||||
'"' => { |
||||
let remain = chars.as_str(); |
||||
return if let Some(remain) = remain.strip_prefix('@') { |
||||
let end = remain |
||||
.find(|v| !matches!(v, 'a'..='z' | 'A'..='Z' | '-')) |
||||
.unwrap_or(remain.len()); |
||||
let (language, remain) = remain.split_at(end); |
||||
Ok(( |
||||
Literal::new_language_tagged_literal(value, language).map_err( |
||||
|error| { |
||||
TermParseError(TermParseErrorKind::LanguageTag { |
||||
value: language.to_owned(), |
||||
error, |
||||
}) |
||||
}, |
||||
)?, |
||||
remain, |
||||
)) |
||||
} else if let Some(remain) = remain.strip_prefix("^^") { |
||||
let (datatype, remain) = read_named_node(remain)?; |
||||
Ok((Literal::new_typed_literal(value, datatype), remain)) |
||||
} else { |
||||
Ok((Literal::new_simple_literal(value), remain)) |
||||
}; |
||||
} |
||||
'\\' => { |
||||
if let Some(c) = chars.next() { |
||||
value.push(match c { |
||||
't' => '\t', |
||||
'b' => '\u{08}', |
||||
'n' => '\n', |
||||
'r' => '\r', |
||||
'f' => '\u{0C}', |
||||
'"' => '"', |
||||
'\'' => '\'', |
||||
'\\' => '\\', |
||||
'u' => read_hexa_char(&mut chars, 4)?, |
||||
'U' => read_hexa_char(&mut chars, 8)?, |
||||
_ => return Err(TermParseError::msg("Unexpected escaped char")), |
||||
}) |
||||
} else { |
||||
return Err(TermParseError::msg("Unexpected literal end")); |
||||
} |
||||
} |
||||
_ => value.push(c), |
||||
} |
||||
} |
||||
Err(TermParseError::msg("Unexpected literal end")) |
||||
} else if let Some(remain) = s.strip_prefix("true") { |
||||
Ok((Literal::new_typed_literal("true", xsd::BOOLEAN), remain)) |
||||
} else if let Some(remain) = s.strip_prefix("false") { |
||||
Ok((Literal::new_typed_literal("false", xsd::BOOLEAN), remain)) |
||||
} else { |
||||
let input = s.as_bytes(); |
||||
if input.is_empty() { |
||||
return Err(TermParseError::msg("Empty term serialization")); |
||||
} |
||||
|
||||
let mut cursor = match input.first() { |
||||
Some(b'+' | b'-') => 1, |
||||
_ => 0, |
||||
}; |
||||
let mut with_dot = false; |
||||
|
||||
let mut count_before: usize = 0; |
||||
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { |
||||
count_before += 1; |
||||
cursor += 1; |
||||
} |
||||
|
||||
let mut count_after: usize = 0; |
||||
if cursor < input.len() && input[cursor] == b'.' { |
||||
with_dot = true; |
||||
cursor += 1; |
||||
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { |
||||
count_after += 1; |
||||
cursor += 1; |
||||
} |
||||
} |
||||
|
||||
if cursor < input.len() && (input[cursor] == b'e' || input[cursor] == b'E') { |
||||
cursor += 1; |
||||
cursor += match input.get(cursor) { |
||||
Some(b'+' | b'-') => 1, |
||||
_ => 0, |
||||
}; |
||||
let mut count_exponent = 0; |
||||
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { |
||||
count_exponent += 1; |
||||
cursor += 1; |
||||
} |
||||
if count_exponent > 0 { |
||||
Ok((Literal::new_typed_literal(s, xsd::DOUBLE), &s[cursor..])) |
||||
} else { |
||||
Err(TermParseError::msg( |
||||
"Double serialization with an invalid exponent", |
||||
)) |
||||
} |
||||
} else if with_dot { |
||||
if count_after > 0 { |
||||
Ok((Literal::new_typed_literal(s, xsd::DECIMAL), &s[cursor..])) |
||||
} else { |
||||
Err(TermParseError::msg( |
||||
"Decimal serialization without floating part", |
||||
)) |
||||
} |
||||
} else if count_before > 0 { |
||||
Ok((Literal::new_typed_literal(s, xsd::INTEGER), &s[cursor..])) |
||||
} else { |
||||
Err(TermParseError::msg("Empty integer serialization")) |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> { |
||||
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { |
||||
return Err(TermParseError::msg( |
||||
"Too many nested triples. The parser fails here to avoid a stack overflow.", |
||||
)); |
||||
} |
||||
let s = s.trim(); |
||||
#[allow(unused_variables)] |
||||
if let Some(remain) = s.strip_prefix("<<") { |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?; |
||||
let (predicate, remain) = read_named_node(remain)?; |
||||
let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?; |
||||
let remain = remain.trim_start(); |
||||
if let Some(remain) = remain.strip_prefix(">>") { |
||||
Ok(( |
||||
Triple { |
||||
subject: match subject { |
||||
Term::NamedNode(s) => s.into(), |
||||
Term::BlankNode(s) => s.into(), |
||||
Term::Literal(_) => { |
||||
return Err(TermParseError::msg( |
||||
"Literals are not allowed in subject position", |
||||
)); |
||||
} |
||||
Term::Triple(s) => Subject::Triple(s), |
||||
}, |
||||
predicate, |
||||
object, |
||||
} |
||||
.into(), |
||||
remain, |
||||
)) |
||||
} else { |
||||
Err(TermParseError::msg( |
||||
"Nested triple serialization should be enclosed between << and >>", |
||||
)) |
||||
} |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
Err(TermParseError::msg("RDF-star is not supported")) |
||||
} |
||||
} else if s.starts_with('<') { |
||||
let (term, remain) = read_named_node(s)?; |
||||
Ok((term.into(), remain)) |
||||
} else if s.starts_with('_') { |
||||
let (term, remain) = read_blank_node(s)?; |
||||
Ok((term.into(), remain)) |
||||
} else { |
||||
let (term, remain) = read_literal(s)?; |
||||
Ok((term.into(), remain)) |
||||
} |
||||
} |
||||
|
||||
fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> { |
||||
let mut value = 0; |
||||
for _ in 0..len { |
||||
if let Some(c) = input.next() { |
||||
value = value * 16 |
||||
+ match c { |
||||
'0'..='9' => u32::from(c) - u32::from('0'), |
||||
'a'..='f' => u32::from(c) - u32::from('a') + 10, |
||||
'A'..='F' => u32::from(c) - u32::from('A') + 10, |
||||
_ => { |
||||
return Err(TermParseError::msg( |
||||
"Unexpected character in a unicode escape", |
||||
)); |
||||
} |
||||
} |
||||
} else { |
||||
return Err(TermParseError::msg("Unexpected literal string end")); |
||||
} |
||||
} |
||||
char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point")) |
||||
} |
||||
|
||||
/// An error raised during term serialization parsing using the [`FromStr`] trait.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct TermParseError(#[from] TermParseErrorKind); |
||||
|
||||
/// An internal error raised during term serialization parsing using the [`FromStr`] trait.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum TermParseErrorKind { |
||||
#[error("Error while parsing the named node '{value}': {error}")] |
||||
Iri { error: IriParseError, value: String }, |
||||
#[error("Error while parsing the blank node '{value}': {error}")] |
||||
BlankNode { |
||||
error: BlankNodeIdParseError, |
||||
value: String, |
||||
}, |
||||
#[error("Error while parsing the language tag '{value}': {error}")] |
||||
LanguageTag { |
||||
error: LanguageTagParseError, |
||||
value: String, |
||||
}, |
||||
#[error("Error while parsing the variable '{value}': {error}")] |
||||
Variable { |
||||
error: VariableNameParseError, |
||||
value: String, |
||||
}, |
||||
#[error("{0}")] |
||||
Msg(&'static str), |
||||
} |
||||
|
||||
impl TermParseError { |
||||
pub(crate) fn msg(msg: &'static str) -> Self { |
||||
Self(TermParseErrorKind::Msg(msg)) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[cfg(feature = "rdf-star")] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn triple_term_parsing() { |
||||
assert_eq!( |
||||
Term::from_str("\"ex\"").unwrap(), |
||||
Literal::new_simple_literal("ex").into() |
||||
); |
||||
assert_eq!( |
||||
Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(), |
||||
Triple::new( |
||||
BlankNode::new("s").unwrap(), |
||||
NamedNode::new("http://example.com/p").unwrap(), |
||||
Literal::new_simple_literal("o"), |
||||
) |
||||
.into() |
||||
); |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,216 @@ |
||||
use std::cmp::Ordering; |
||||
use std::fmt; |
||||
|
||||
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
|
||||
///
|
||||
/// The default string formatter is returning a SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::{Variable, VariableNameParseError};
|
||||
///
|
||||
/// assert_eq!("?foo", Variable::new("foo")?.to_string());
|
||||
/// # Result::<_,VariableNameParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||
pub struct Variable { |
||||
name: String, |
||||
} |
||||
|
||||
impl Variable { |
||||
/// Creates a variable name from a unique identifier.
|
||||
///
|
||||
/// The variable identifier must be valid according to the SPARQL grammar.
|
||||
pub fn new(name: impl Into<String>) -> Result<Self, VariableNameParseError> { |
||||
let name = name.into(); |
||||
validate_variable_identifier(&name)?; |
||||
Ok(Self::new_unchecked(name)) |
||||
} |
||||
|
||||
/// Creates a variable name from a unique identifier without validation.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
|
||||
/// according to the SPARQL grammar.
|
||||
///
|
||||
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub fn new_unchecked(name: impl Into<String>) -> Self { |
||||
Self { name: name.into() } |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_str(&self) -> &str { |
||||
&self.name |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_string(self) -> String { |
||||
self.name |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn as_ref(&self) -> VariableRef<'_> { |
||||
VariableRef { name: &self.name } |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Variable { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.as_ref().fmt(f) |
||||
} |
||||
} |
||||
|
||||
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) borrowed variable.
|
||||
///
|
||||
/// The default string formatter is returning a SPARQL compatible representation:
|
||||
/// ```
|
||||
/// use oxrdf::{VariableNameParseError, VariableRef};
|
||||
///
|
||||
/// assert_eq!("?foo", VariableRef::new("foo")?.to_string());
|
||||
/// # Result::<_,VariableNameParseError>::Ok(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] |
||||
pub struct VariableRef<'a> { |
||||
name: &'a str, |
||||
} |
||||
|
||||
impl<'a> VariableRef<'a> { |
||||
/// Creates a variable name from a unique identifier.
|
||||
///
|
||||
/// The variable identifier must be valid according to the SPARQL grammar.
|
||||
pub fn new(name: &'a str) -> Result<Self, VariableNameParseError> { |
||||
validate_variable_identifier(name)?; |
||||
Ok(Self::new_unchecked(name)) |
||||
} |
||||
|
||||
/// Creates a variable name from a unique identifier without validation.
|
||||
///
|
||||
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
|
||||
/// according to the SPARQL grammar.
|
||||
///
|
||||
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
|
||||
#[inline] |
||||
pub const fn new_unchecked(name: &'a str) -> Self { |
||||
Self { name } |
||||
} |
||||
|
||||
#[inline] |
||||
pub const fn as_str(self) -> &'a str { |
||||
self.name |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_string(self) -> String { |
||||
self.name.to_owned() |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn into_owned(self) -> Variable { |
||||
Variable { |
||||
name: self.name.to_owned(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for VariableRef<'_> { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
write!(f, "?{}", self.name) |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<&'a Variable> for VariableRef<'a> { |
||||
#[inline] |
||||
fn from(variable: &'a Variable) -> Self { |
||||
variable.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl<'a> From<VariableRef<'a>> for Variable { |
||||
#[inline] |
||||
fn from(variable: VariableRef<'a>) -> Self { |
||||
variable.into_owned() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<Variable> for VariableRef<'_> { |
||||
#[inline] |
||||
fn eq(&self, other: &Variable) -> bool { |
||||
*self == other.as_ref() |
||||
} |
||||
} |
||||
|
||||
impl PartialEq<VariableRef<'_>> for Variable { |
||||
#[inline] |
||||
fn eq(&self, other: &VariableRef<'_>) -> bool { |
||||
self.as_ref() == *other |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd<Variable> for VariableRef<'_> { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &Variable) -> Option<Ordering> { |
||||
self.partial_cmp(&other.as_ref()) |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd<VariableRef<'_>> for Variable { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &VariableRef<'_>) -> Option<Ordering> { |
||||
self.as_ref().partial_cmp(other) |
||||
} |
||||
} |
||||
|
||||
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> { |
||||
let mut chars = id.chars(); |
||||
let front = chars.next().ok_or(VariableNameParseError)?; |
||||
match front { |
||||
'0'..='9' |
||||
| '_' |
||||
| ':' |
||||
| 'A'..='Z' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}' => (), |
||||
_ => return Err(VariableNameParseError), |
||||
} |
||||
for c in chars { |
||||
match c { |
||||
'0'..='9' |
||||
| '\u{00B7}' |
||||
| '\u{0300}'..='\u{036F}' |
||||
| '\u{203F}'..='\u{2040}' |
||||
| '_' |
||||
| 'A'..='Z' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}' => (), |
||||
_ => return Err(VariableNameParseError), |
||||
} |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
/// An error raised during [`Variable`] name validation.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error("The variable name is invalid")] |
||||
pub struct VariableNameParseError; |
@ -0,0 +1,242 @@ |
||||
//! Provides ready to use [`NamedNodeRef`](super::NamedNodeRef)s for basic RDF vocabularies.
|
||||
|
||||
pub mod rdf { |
||||
//! [RDF](https://www.w3.org/TR/rdf11-concepts/) vocabulary.
|
||||
use crate::oxrdf::named_node::NamedNodeRef; |
||||
|
||||
/// The class of containers of alternatives.
|
||||
pub const ALT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt"); |
||||
/// The class of unordered containers.
|
||||
pub const BAG: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag"); |
||||
/// The first item in the subject RDF list.
|
||||
pub const FIRST: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); |
||||
/// The class of HTML literal values.
|
||||
pub const HTML: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML"); |
||||
/// The class of language-tagged string literal values.
|
||||
pub const LANG_STRING: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"); |
||||
/// The class of RDF lists.
|
||||
pub const LIST: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#List"); |
||||
/// The empty list.
|
||||
pub const NIL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); |
||||
/// The object of the subject RDF statement.
|
||||
pub const OBJECT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#object"); |
||||
/// The predicate of the subject RDF statement.
|
||||
pub const PREDICATE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"); |
||||
/// The class of RDF properties.
|
||||
pub const PROPERTY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"); |
||||
/// The rest of the subject RDF list after the first item.
|
||||
pub const REST: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); |
||||
/// The class of ordered containers.
|
||||
pub const SEQ: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq"); |
||||
/// The class of RDF statements.
|
||||
pub const STATEMENT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"); |
||||
/// The subject of the subject RDF statement.
|
||||
pub const SUBJECT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"); |
||||
/// The subject is an instance of a class.
|
||||
pub const TYPE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); |
||||
/// Idiomatic property used for structured values.
|
||||
pub const VALUE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#value"); |
||||
/// The class of XML literal values.
|
||||
pub const XML_LITERAL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"); |
||||
} |
||||
|
||||
pub mod rdfs { |
||||
//! [RDFS](https://www.w3.org/TR/rdf-schema/) vocabulary.
|
||||
use crate::oxrdf::named_node::NamedNodeRef; |
||||
|
||||
/// The class of classes.
|
||||
pub const CLASS: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Class"); |
||||
/// A description of the subject resource.
|
||||
pub const COMMENT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#comment"); |
||||
/// The class of RDF containers.
|
||||
pub const CONTAINER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Container"); |
||||
/// The class of container membership properties, `rdf:_1`, `rdf:_2`, ..., all of which are sub-properties of `member`.
|
||||
pub const CONTAINER_MEMBERSHIP_PROPERTY: NamedNodeRef<'_> = NamedNodeRef::new_unchecked( |
||||
"http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty", |
||||
); |
||||
/// The class of RDF datatypes.
|
||||
pub const DATATYPE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Datatype"); |
||||
/// A domain of the subject property.
|
||||
pub const DOMAIN: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#domain"); |
||||
/// The definition of the subject resource.
|
||||
pub const IS_DEFINED_BY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#isDefinedBy"); |
||||
/// A human-readable name for the subject.
|
||||
pub const LABEL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#label"); |
||||
/// The class of literal values, e.g. textual strings and integers.
|
||||
pub const LITERAL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Literal"); |
||||
/// A member of the subject resource.
|
||||
pub const MEMBER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#member"); |
||||
/// A range of the subject property.
|
||||
pub const RANGE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#range"); |
||||
/// The class resource, everything.
|
||||
pub const RESOURCE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Resource"); |
||||
/// Further information about the subject resource.
|
||||
pub const SEE_ALSO: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#seeAlso"); |
||||
/// The subject is a subclass of a class.
|
||||
pub const SUB_CLASS_OF: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subClassOf"); |
||||
/// The subject is a subproperty of a property.
|
||||
pub const SUB_PROPERTY_OF: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"); |
||||
} |
||||
|
||||
pub mod xsd { |
||||
//! [RDF compatible XSD datatypes](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-compatible-xsd-types).
|
||||
use crate::oxrdf::named_node::NamedNodeRef; |
||||
|
||||
/// Absolute or relative URIs and IRIs.
|
||||
pub const ANY_URI: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#anyURI"); |
||||
/// Base64-encoded binary data.
|
||||
pub const BASE_64_BINARY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#base64Binary"); |
||||
/// true, false.
|
||||
pub const BOOLEAN: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#boolean"); |
||||
/// 128…+127 (8 bit).
|
||||
pub const BYTE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#byte"); |
||||
/// Dates (yyyy-mm-dd) with or without timezone.
|
||||
pub const DATE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#date"); |
||||
/// Duration of time (days, hours, minutes, seconds only).
|
||||
pub const DAY_TIME_DURATION: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dayTimeDuration"); |
||||
/// Date and time with or without timezone.
|
||||
pub const DATE_TIME: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTime"); |
||||
/// Date and time with required timezone.
|
||||
pub const DATE_TIME_STAMP: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTimeStamp"); |
||||
/// Arbitrary-precision decimal numbers.
|
||||
pub const DECIMAL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#decimal"); |
||||
/// 64-bit floating point numbers incl. ±Inf, ±0, NaN.
|
||||
pub const DOUBLE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#double"); |
||||
/// Duration of time.
|
||||
pub const DURATION: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#duration"); |
||||
/// 32-bit floating point numbers incl. ±Inf, ±0, NaN.
|
||||
pub const FLOAT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#float"); |
||||
/// Gregorian calendar day of the month.
|
||||
pub const G_DAY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gDay"); |
||||
/// Gregorian calendar month.
|
||||
pub const G_MONTH: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonth"); |
||||
/// Gregorian calendar month and day.
|
||||
pub const G_MONTH_DAY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonthDay"); |
||||
/// Gregorian calendar year.
|
||||
pub const G_YEAR: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYear"); |
||||
/// Gregorian calendar year and month.
|
||||
pub const G_YEAR_MONTH: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYearMonth"); |
||||
/// Hex-encoded binary data.
|
||||
pub const HEX_BINARY: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#hexBinary"); |
||||
/// -2147483648…+2147483647 (32 bit).
|
||||
pub const INT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#int"); |
||||
/// Arbitrary-size integer numbers.
|
||||
pub const INTEGER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#integer"); |
||||
/// Language tags per [BCP47](http://tools.ietf.org/html/bcp47).
|
||||
pub const LANGUAGE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#language"); |
||||
/// -9223372036854775808…+9223372036854775807 (64 bit).
|
||||
pub const LONG: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#long"); |
||||
/// XML Names.
|
||||
pub const NAME: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#Name"); |
||||
/// XML NCName.
|
||||
pub const NC_NAME: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NCName"); |
||||
/// Integer numbers <0.
|
||||
pub const NEGATIVE_INTEGER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#negativeInteger"); |
||||
/// XML NMTOKENs.
|
||||
pub const NMTOKEN: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NMTOKEN"); |
||||
/// Integer numbers ≥0.
|
||||
pub const NON_NEGATIVE_INTEGER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonNegativeInteger"); |
||||
/// Integer numbers ≤0.
|
||||
pub const NON_POSITIVE_INTEGER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonPositiveInteger"); |
||||
/// Whitespace-normalized strings.
|
||||
pub const NORMALIZED_STRING: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#normalizedString"); |
||||
/// Integer numbers >0.
|
||||
pub const POSITIVE_INTEGER: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#positiveInteger"); |
||||
/// Times (hh:mm:ss.sss…) with or without timezone.
|
||||
pub const TIME: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#time"); |
||||
/// -32768…+32767 (16 bit).
|
||||
pub const SHORT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#short"); |
||||
/// Character strings (but not all Unicode character strings).
|
||||
pub const STRING: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#string"); |
||||
/// Tokenized strings.
|
||||
pub const TOKEN: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#token"); |
||||
/// 0…255 (8 bit).
|
||||
pub const UNSIGNED_BYTE: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedByte"); |
||||
/// 0…4294967295 (32 bit).
|
||||
pub const UNSIGNED_INT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedInt"); |
||||
/// 0…18446744073709551615 (64 bit).
|
||||
pub const UNSIGNED_LONG: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedLong"); |
||||
/// 0…65535 (16 bit).
|
||||
pub const UNSIGNED_SHORT: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedShort"); |
||||
/// Duration of time (months and years only).
|
||||
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration"); |
||||
} |
||||
|
||||
pub mod geosparql { |
||||
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
|
||||
use crate::oxrdf::named_node::NamedNodeRef; |
||||
|
||||
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
|
||||
pub const WKT_LITERAL: NamedNodeRef<'_> = |
||||
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral"); |
||||
} |
@ -0,0 +1,67 @@ |
||||
OxRDF I/O |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio) |
||||
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRDF I/O is a set of parsers and serializers for RDF. |
||||
|
||||
It supports: |
||||
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml) |
||||
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl) |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star). |
||||
|
||||
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature). |
||||
|
||||
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs. |
||||
|
||||
Usage example converting a Turtle file to a N-Triples file: |
||||
```rust |
||||
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; |
||||
|
||||
let turtle_file = b"@base <http://example.com/> . |
||||
@prefix schema: <http://schema.org/> . |
||||
<foo> a schema:Person ; |
||||
schema:name \"Foo\" . |
||||
<bar> a schema:Person ; |
||||
schema:name \"Bar\" ."; |
||||
|
||||
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/foo> <http://schema.org/name> \"Foo\" . |
||||
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> . |
||||
<http://example.com/bar> <http://schema.org/name> \"Bar\" . |
||||
"; |
||||
|
||||
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new()); |
||||
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) { |
||||
writer.write_quad(&quad.unwrap()).unwrap(); |
||||
} |
||||
assert_eq!(writer.finish().unwrap(), ntriples_file); |
||||
``` |
||||
|
||||
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD. |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,124 @@ |
||||
use crate::oxrdfxml; |
||||
use crate::oxttl; |
||||
use std::io; |
||||
use std::ops::Range; |
||||
|
||||
/// Error returned during RDF format parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfSyntaxError), |
||||
} |
||||
|
||||
impl RdfParseError { |
||||
pub(crate) fn msg(msg: &'static str) -> Self { |
||||
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg))) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::Turtle(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxttl::TurtleParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxttl::TurtleParseError) -> Self { |
||||
match error { |
||||
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxttl::TurtleParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::RdfXml(error)) |
||||
} |
||||
} |
||||
|
||||
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError { |
||||
#[inline] |
||||
fn from(error: oxrdfxml::RdfXmlParseError) -> Self { |
||||
match error { |
||||
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()), |
||||
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfParseError) -> Self { |
||||
match error { |
||||
RdfParseError::Io(error) => error, |
||||
RdfParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfSyntaxError(#[from] SyntaxErrorKind); |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Turtle(#[from] oxttl::TurtleSyntaxError), |
||||
#[error(transparent)] |
||||
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError), |
||||
#[error("{0}")] |
||||
Msg(&'static str), |
||||
} |
||||
|
||||
impl RdfSyntaxError { |
||||
/// The location of the error inside of the file.
|
||||
#[inline] |
||||
pub fn location(&self) -> Option<Range<TextPosition>> { |
||||
match &self.0 { |
||||
SyntaxErrorKind::Turtle(e) => { |
||||
let location = e.location(); |
||||
Some( |
||||
TextPosition { |
||||
line: location.start.line, |
||||
column: location.start.column, |
||||
offset: location.start.offset, |
||||
}..TextPosition { |
||||
line: location.end.line, |
||||
column: location.end.column, |
||||
offset: location.end.offset, |
||||
}, |
||||
) |
||||
} |
||||
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Turtle(error) => error.into(), |
||||
SyntaxErrorKind::RdfXml(error) => error.into(), |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
||||
pub struct TextPosition { |
||||
pub line: u64, |
||||
pub column: u64, |
||||
pub offset: u64, |
||||
} |
@ -0,0 +1,216 @@ |
||||
use std::fmt; |
||||
|
||||
/// RDF serialization formats.
|
||||
///
|
||||
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
#[non_exhaustive] |
||||
pub enum RdfFormat { |
||||
/// [N3](https://w3c.github.io/N3/spec/)
|
||||
N3, |
||||
/// [N-Quads](https://www.w3.org/TR/n-quads/)
|
||||
NQuads, |
||||
/// [N-Triples](https://www.w3.org/TR/n-triples/)
|
||||
NTriples, |
||||
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
|
||||
RdfXml, |
||||
/// [TriG](https://www.w3.org/TR/trig/)
|
||||
TriG, |
||||
/// [Turtle](https://www.w3.org/TR/turtle/)
|
||||
Turtle, |
||||
} |
||||
|
||||
impl RdfFormat { |
||||
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::NTriples.iri(),
|
||||
/// "http://www.w3.org/ns/formats/N-Triples"
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn iri(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "http://www.w3.org/ns/formats/N3", |
||||
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads", |
||||
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples", |
||||
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML", |
||||
Self::TriG => "http://www.w3.org/ns/formats/TriG", |
||||
Self::Turtle => "http://www.w3.org/ns/formats/Turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn media_type(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "text/n3", |
||||
Self::NQuads => "application/n-quads", |
||||
Self::NTriples => "application/n-triples", |
||||
Self::RdfXml => "application/rdf+xml", |
||||
Self::TriG => "application/trig", |
||||
Self::Turtle => "text/turtle", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn file_extension(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "n3", |
||||
Self::NQuads => "nq", |
||||
Self::NTriples => "nt", |
||||
Self::RdfXml => "rdf", |
||||
Self::TriG => "trig", |
||||
Self::Turtle => "ttl", |
||||
} |
||||
} |
||||
|
||||
/// The format name.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn name(self) -> &'static str { |
||||
match self { |
||||
Self::N3 => "N3", |
||||
Self::NQuads => "N-Quads", |
||||
Self::NTriples => "N-Triples", |
||||
Self::RdfXml => "RDF/XML", |
||||
Self::TriG => "TriG", |
||||
Self::Turtle => "Turtle", |
||||
} |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
|
||||
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn supports_datasets(self) -> bool { |
||||
matches!(self, Self::NQuads | Self::TriG) |
||||
} |
||||
|
||||
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
|
||||
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
|
||||
/// ```
|
||||
#[inline] |
||||
#[cfg(feature = "rdf-star")] |
||||
pub const fn supports_rdf_star(self) -> bool { |
||||
matches!( |
||||
self, |
||||
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG |
||||
) |
||||
} |
||||
|
||||
/// Looks for a known format from a media type.
|
||||
///
|
||||
/// It supports some media type aliases.
|
||||
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
|
||||
/// Some(RdfFormat::Turtle)
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_media_type(media_type: &str) -> Option<Self> { |
||||
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ |
||||
("n-quads", RdfFormat::NQuads), |
||||
("n-triples", RdfFormat::NTriples), |
||||
("n3", RdfFormat::N3), |
||||
("nquads", RdfFormat::NQuads), |
||||
("ntriples", RdfFormat::NTriples), |
||||
("plain", RdfFormat::NTriples), |
||||
("rdf+xml", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("turtle", RdfFormat::Turtle), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
|
||||
let (r#type, subtype) = media_type |
||||
.split_once(';') |
||||
.unwrap_or((media_type, "")) |
||||
.0 |
||||
.split_once('/')?; |
||||
let r#type = r#type.trim(); |
||||
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { |
||||
return None; |
||||
} |
||||
let subtype = subtype.trim(); |
||||
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); |
||||
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { |
||||
if candidate_subtype.eq_ignore_ascii_case(subtype) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
/// Looks for a known format from an extension.
|
||||
///
|
||||
/// It supports some aliases.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use oxrdfio::RdfFormat;
|
||||
///
|
||||
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_extension(extension: &str) -> Option<Self> { |
||||
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [ |
||||
("n3", RdfFormat::N3), |
||||
("nq", RdfFormat::NQuads), |
||||
("nt", RdfFormat::NTriples), |
||||
("rdf", RdfFormat::RdfXml), |
||||
("trig", RdfFormat::TriG), |
||||
("ttl", RdfFormat::Turtle), |
||||
("txt", RdfFormat::NTriples), |
||||
("xml", RdfFormat::RdfXml), |
||||
]; |
||||
for (candidate_extension, candidate_id) in MEDIA_TYPES { |
||||
if candidate_extension.eq_ignore_ascii_case(extension) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for RdfFormat { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
f.write_str(self.name()) |
||||
} |
||||
} |
@ -0,0 +1,9 @@ |
||||
mod error; |
||||
mod format; |
||||
mod parser; |
||||
mod serializer; |
||||
|
||||
pub use error::{RdfParseError, RdfSyntaxError, TextPosition}; |
||||
pub use format::RdfFormat; |
||||
pub use parser::{FromReadQuadReader, RdfParser}; |
||||
pub use serializer::{RdfSerializer, ToWriteQuadWriter}; |
@ -0,0 +1,795 @@ |
||||
//! Utilities to read RDF graphs and datasets.
|
||||
|
||||
use crate::oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple}; |
||||
pub use crate::oxrdfio::error::RdfParseError; |
||||
use crate::oxrdfio::format::RdfFormat; |
||||
use crate::oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
use crate::oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term}; |
||||
use crate::oxttl::nquads::{FromReadNQuadsReader, NQuadsParser}; |
||||
use crate::oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser}; |
||||
use crate::oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter}; |
||||
use crate::oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter}; |
||||
use std::collections::HashMap; |
||||
use std::io::Read; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncRead; |
||||
|
||||
/// Parsers for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// Note the useful options:
|
||||
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
|
||||
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
|
||||
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
|
||||
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfParser { |
||||
inner: RdfParserKind, |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
rename_blank_nodes: bool, |
||||
} |
||||
|
||||
enum RdfParserKind { |
||||
N3(N3Parser), |
||||
NQuads(NQuadsParser), |
||||
NTriples(NTriplesParser), |
||||
RdfXml(RdfXmlParser), |
||||
TriG(TriGParser), |
||||
Turtle(TurtleParser), |
||||
} |
||||
|
||||
impl RdfParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()), |
||||
RdfFormat::NQuads => RdfParserKind::NQuads({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NQuadsParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NQuadsParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::NTriples => RdfParserKind::NTriples({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
NTriplesParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
NTriplesParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()), |
||||
RdfFormat::TriG => RdfParserKind::TriG({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TriGParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TriGParser::new() |
||||
} |
||||
}), |
||||
RdfFormat::Turtle => RdfParserKind::Turtle({ |
||||
#[cfg(feature = "rdf-star")] |
||||
{ |
||||
TurtleParser::new().with_quoted_triples() |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
TurtleParser::new() |
||||
} |
||||
}), |
||||
}, |
||||
default_graph: GraphName::DefaultGraph, |
||||
without_named_graphs: false, |
||||
rename_blank_nodes: false, |
||||
} |
||||
} |
||||
|
||||
/// The format the parser uses.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfParserKind::N3(_) => RdfFormat::N3, |
||||
RdfParserKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfParserKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfParserKind::TriG(_) => RdfFormat::TriG, |
||||
RdfParserKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// Provides an IRI that could be used to resolve the file relative IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "</s> </p> </o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?), |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Provides the name graph name that should replace the default graph in the returned quads.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::NamedNode;
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
|
||||
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self { |
||||
self.default_graph = default_graph.into(); |
||||
self |
||||
} |
||||
|
||||
/// Sets that the parser must fail if parsing a named graph.
|
||||
///
|
||||
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
|
||||
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn without_named_graphs(mut self) -> Self { |
||||
self.without_named_graphs = true; |
||||
self |
||||
} |
||||
|
||||
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
|
||||
///
|
||||
/// This allows to avoid id conflicts when merging graphs together.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
|
||||
/// .rename_blank_nodes()
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
/// assert_ne!(result1, result2);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn rename_blank_nodes(mut self) -> Self { |
||||
self.rename_blank_nodes = true; |
||||
self |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.inner = match self.inner { |
||||
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()), |
||||
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()), |
||||
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()), |
||||
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()), |
||||
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()), |
||||
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()), |
||||
}; |
||||
self |
||||
} |
||||
|
||||
/// Parses from a [`Read`] implementation and returns an iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> { |
||||
FromReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)), |
||||
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)), |
||||
RdfParserKind::NTriples(p) => { |
||||
FromReadQuadReaderKind::NTriples(p.parse_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)), |
||||
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)), |
||||
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)), |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
reader: R, |
||||
) -> FromTokioAsyncReadQuadReader<R> { |
||||
FromTokioAsyncReadQuadReader { |
||||
parser: match self.inner { |
||||
RdfParserKind::N3(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NQuads(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::NTriples(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::RdfXml(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::TriG(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader)) |
||||
} |
||||
RdfParserKind::Turtle(p) => { |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader)) |
||||
} |
||||
}, |
||||
mapper: QuadMapper { |
||||
default_graph: self.default_graph.clone(), |
||||
without_named_graphs: self.without_named_graphs, |
||||
blank_node_map: self.rename_blank_nodes.then(HashMap::new), |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfParser { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let quads = parser
|
||||
/// .parse_read(file.as_bytes())
|
||||
/// .collect::<Result<Vec<_>, _>>()?;
|
||||
///
|
||||
/// assert_eq!(quads.len(), 1);
|
||||
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadQuadReader<R: Read> { |
||||
parser: FromReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
enum FromReadQuadReaderKind<R: Read> { |
||||
N3(FromReadN3Reader<R>), |
||||
NQuads(FromReadNQuadsReader<R>), |
||||
NTriples(FromReadNTriplesReader<R>), |
||||
RdfXml(FromReadRdfXmlReader<R>), |
||||
TriG(FromReadTriGReader<R>), |
||||
Turtle(FromReadTurtleReader<R>), |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadQuadReader<R> { |
||||
type Item = Result<Quad, RdfParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(match &mut self.parser { |
||||
FromReadQuadReaderKind::N3(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::TriG(parser) => match parser.next()? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read> FromReadQuadReader<R> { |
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromReadQuadReaderKind::NQuads(_) |
||||
| FromReadQuadReaderKind::NTriples(_) |
||||
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Reads are buffered.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
|
||||
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
|
||||
///
|
||||
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
|
||||
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
|
||||
/// if let Some(quad) = reader.next().await {
|
||||
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> { |
||||
parser: FromTokioAsyncReadQuadReaderKind<R>, |
||||
mapper: QuadMapper, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> { |
||||
N3(FromTokioAsyncReadN3Reader<R>), |
||||
NQuads(FromTokioAsyncReadNQuadsReader<R>), |
||||
NTriples(FromTokioAsyncReadNTriplesReader<R>), |
||||
RdfXml(FromTokioAsyncReadRdfXmlReader<R>), |
||||
TriG(FromTokioAsyncReadTriGReader<R>), |
||||
Turtle(FromTokioAsyncReadTurtleReader<R>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> { |
||||
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> { |
||||
Some(match &mut self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_n3_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? { |
||||
Ok(quad) => self.mapper.map_quad(quad), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? { |
||||
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)), |
||||
Err(e) => Err(e.into()), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// An empty iterator is return if the format does not support prefixes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
|
||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> PrefixesIter<'_> { |
||||
PrefixesIter { |
||||
inner: match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => { |
||||
PrefixesIterKind::Turtle(p.prefixes()) |
||||
} |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */ |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfParser};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader =
|
||||
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
|
||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
match &self.parser { |
||||
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(), |
||||
FromTokioAsyncReadQuadReaderKind::NQuads(_) |
||||
| FromTokioAsyncReadQuadReaderKind::NTriples(_) |
||||
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
|
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Iterator on the file prefixes.
|
||||
///
|
||||
/// See [`FromReadQuadReader::prefixes`].
|
||||
pub struct PrefixesIter<'a> { |
||||
inner: PrefixesIterKind<'a>, |
||||
} |
||||
|
||||
enum PrefixesIterKind<'a> { |
||||
Turtle(TurtlePrefixesIter<'a>), |
||||
TriG(TriGPrefixesIter<'a>), |
||||
N3(N3PrefixesIter<'a>), |
||||
None, |
||||
} |
||||
|
||||
impl<'a> Iterator for PrefixesIter<'a> { |
||||
type Item = (&'a str, &'a str); |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
match &mut self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.next(), |
||||
PrefixesIterKind::TriG(iter) => iter.next(), |
||||
PrefixesIterKind::N3(iter) => iter.next(), |
||||
PrefixesIterKind::None => None, |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
match &self.inner { |
||||
PrefixesIterKind::Turtle(iter) => iter.size_hint(), |
||||
PrefixesIterKind::TriG(iter) => iter.size_hint(), |
||||
PrefixesIterKind::N3(iter) => iter.size_hint(), |
||||
PrefixesIterKind::None => (0, Some(0)), |
||||
} |
||||
} |
||||
} |
||||
|
||||
struct QuadMapper { |
||||
default_graph: GraphName, |
||||
without_named_graphs: bool, |
||||
blank_node_map: Option<HashMap<BlankNode, BlankNode>>, |
||||
} |
||||
|
||||
impl QuadMapper { |
||||
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode { |
||||
if let Some(blank_node_map) = &mut self.blank_node_map { |
||||
blank_node_map |
||||
.entry(node) |
||||
.or_insert_with(BlankNode::default) |
||||
.clone() |
||||
} else { |
||||
node |
||||
} |
||||
} |
||||
|
||||
fn map_subject(&mut self, node: Subject) -> Subject { |
||||
match node { |
||||
Subject::NamedNode(node) => node.into(), |
||||
Subject::BlankNode(node) => self.map_blank_node(node).into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Subject::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_term(&mut self, node: Term) -> Term { |
||||
match node { |
||||
Term::NamedNode(node) => node.into(), |
||||
Term::BlankNode(node) => self.map_blank_node(node).into(), |
||||
Term::Literal(literal) => literal.into(), |
||||
#[cfg(feature = "rdf-star")] |
||||
Term::Triple(triple) => self.map_triple(*triple).into(), |
||||
} |
||||
} |
||||
|
||||
fn map_triple(&mut self, triple: Triple) -> Triple { |
||||
Triple { |
||||
subject: self.map_subject(triple.subject), |
||||
predicate: triple.predicate, |
||||
object: self.map_term(triple.object), |
||||
} |
||||
} |
||||
|
||||
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> { |
||||
match graph_name { |
||||
GraphName::NamedNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(node.into()) |
||||
} |
||||
} |
||||
GraphName::BlankNode(node) => { |
||||
if self.without_named_graphs { |
||||
Err(RdfParseError::msg("Named graphs are not allowed")) |
||||
} else { |
||||
Ok(self.map_blank_node(node).into()) |
||||
} |
||||
} |
||||
GraphName::DefaultGraph => Ok(self.default_graph.clone()), |
||||
} |
||||
} |
||||
|
||||
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: self.map_subject(quad.subject), |
||||
predicate: quad.predicate, |
||||
object: self.map_term(quad.object), |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
|
||||
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad { |
||||
self.map_triple(triple).in_graph(self.default_graph.clone()) |
||||
} |
||||
|
||||
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> { |
||||
Ok(Quad { |
||||
subject: match quad.subject { |
||||
N3Term::NamedNode(s) => Ok(s.into()), |
||||
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF subjects", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(s) => Ok(self.map_triple(*s).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF subjects", |
||||
)), |
||||
}?, |
||||
predicate: match quad.predicate { |
||||
N3Term::NamedNode(p) => Ok(p), |
||||
N3Term::BlankNode(_) => Err(RdfParseError::msg( |
||||
"blank nodes are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Literal(_) => Err(RdfParseError::msg( |
||||
"literals are not allowed in regular RDF predicates", |
||||
)), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(_) => Err(RdfParseError::msg( |
||||
"quoted triples are not allowed in regular RDF predicates", |
||||
)), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF predicates", |
||||
)), |
||||
}?, |
||||
object: match quad.object { |
||||
N3Term::NamedNode(o) => Ok(o.into()), |
||||
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()), |
||||
N3Term::Literal(o) => Ok(o.into()), |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Term::Triple(o) => Ok(self.map_triple(*o).into()), |
||||
N3Term::Variable(_) => Err(RdfParseError::msg( |
||||
"variables are not allowed in regular RDF objects", |
||||
)), |
||||
}?, |
||||
graph_name: self.map_graph_name(quad.graph_name)?, |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,412 @@ |
||||
//! Utilities to write RDF graphs and datasets.
|
||||
|
||||
use crate::oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef}; |
||||
use crate::oxrdfio::format::RdfFormat; |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxrdfxml::ToTokioAsyncWriteRdfXmlWriter; |
||||
use crate::oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::nquads::ToTokioAsyncWriteNQuadsWriter; |
||||
use crate::oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter; |
||||
use crate::oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter; |
||||
use crate::oxttl::trig::{ToWriteTriGWriter, TriGSerializer}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::turtle::ToTokioAsyncWriteTurtleWriter; |
||||
use crate::oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer}; |
||||
use std::io::{self, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A serializer for RDF serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
|
||||
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
|
||||
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
|
||||
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
|
||||
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
|
||||
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct RdfSerializer { |
||||
inner: RdfSerializerKind, |
||||
} |
||||
|
||||
enum RdfSerializerKind { |
||||
NQuads(NQuadsSerializer), |
||||
NTriples(NTriplesSerializer), |
||||
RdfXml(RdfXmlSerializer), |
||||
TriG(TriGSerializer), |
||||
Turtle(TurtleSerializer), |
||||
} |
||||
|
||||
impl RdfSerializer { |
||||
/// Builds a serializer for the given format
|
||||
#[inline] |
||||
pub fn from_format(format: RdfFormat) -> Self { |
||||
Self { |
||||
inner: match format { |
||||
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()), |
||||
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()), |
||||
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()), |
||||
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()), |
||||
RdfFormat::Turtle | RdfFormat::N3 => { |
||||
RdfSerializerKind::Turtle(TurtleSerializer::new()) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// The format the serializer serializes to.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
|
||||
/// RdfFormat::Turtle
|
||||
/// );
|
||||
/// ```
|
||||
pub fn format(&self) -> RdfFormat { |
||||
match &self.inner { |
||||
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads, |
||||
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples, |
||||
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml, |
||||
RdfSerializerKind::TriG(_) => RdfFormat::TriG, |
||||
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle, |
||||
} |
||||
} |
||||
|
||||
/// If the format supports it, sets a prefix.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef {
|
||||
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
|
||||
/// predicate: rdf::TYPE.into(),
|
||||
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(
|
||||
/// writer.finish()?,
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.inner = match self.inner { |
||||
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s), |
||||
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?) |
||||
} |
||||
}; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes to a [`Write`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into()
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> { |
||||
ToWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => { |
||||
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
|
||||
/// Writes to a Tokio [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteQuadWriter<W> { |
||||
ToTokioAsyncWriteQuadWriter { |
||||
formatter: match self.inner { |
||||
RdfSerializerKind::NQuads(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples( |
||||
s.serialize_to_tokio_async_write(write), |
||||
), |
||||
RdfSerializerKind::RdfXml(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::TriG(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
RdfSerializerKind::Turtle(s) => { |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write)) |
||||
} |
||||
}, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfFormat> for RdfSerializer { |
||||
fn from(format: RdfFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new("http://example.com/s")?.into(),
|
||||
/// predicate: NamedNode::new("http://example.com/p")?,
|
||||
/// object: NamedNode::new("http://example.com/o")?.into(),
|
||||
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
|
||||
/// })?;
|
||||
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteQuadWriter<W: Write> { |
||||
formatter: ToWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
enum ToWriteQuadWriterKind<W: Write> { |
||||
NQuads(ToWriteNQuadsWriter<W>), |
||||
NTriples(ToWriteNTriplesWriter<W>), |
||||
RdfXml(ToWriteRdfXmlWriter<W>), |
||||
TriG(ToWriteTriGWriter<W>), |
||||
Turtle(ToWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
impl<W: Write> ToWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?), |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad), |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?), |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?, |
||||
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
/// Writes quads or triples to a [`Write`] implementation.
|
||||
///
|
||||
/// Can be built using [`RdfSerializer::serialize_to_write`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdfio::{RdfFormat, RdfSerializer};
|
||||
/// use oxrdf::{Quad, NamedNode};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(&Quad {
|
||||
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
|
||||
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
|
||||
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
|
||||
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
|
||||
/// }).await?;
|
||||
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[must_use] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> { |
||||
formatter: ToTokioAsyncWriteQuadWriterKind<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> { |
||||
NQuads(ToTokioAsyncWriteNQuadsWriter<W>), |
||||
NTriples(ToTokioAsyncWriteNTriplesWriter<W>), |
||||
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>), |
||||
TriG(ToTokioAsyncWriteTriGWriter<W>), |
||||
Turtle(ToTokioAsyncWriteTurtleWriter<W>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> { |
||||
/// Writes a [`QuadRef`]
|
||||
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
match &mut self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => { |
||||
writer.write_triple(to_triple(quad)?).await |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a [`TripleRef`]
|
||||
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
.await |
||||
} |
||||
|
||||
/// Writes the last bytes of the file
|
||||
///
|
||||
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
|
||||
pub async fn finish(self) -> io::Result<W> { |
||||
Ok(match self.formatter { |
||||
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(), |
||||
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?, |
||||
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> { |
||||
let quad = quad.into(); |
||||
if quad.graph_name.is_default_graph() { |
||||
Ok(quad.into()) |
||||
} else { |
||||
Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"Only quads in the default graph can be serialized to a RDF graph format", |
||||
)) |
||||
} |
||||
} |
@ -0,0 +1,56 @@ |
||||
OxRDF/XML |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml) |
||||
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). |
||||
|
||||
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs. |
||||
|
||||
Usage example counting the number of people in a RDF/XML file: |
||||
|
||||
```rust |
||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
||||
use oxrdfxml::RdfXmlParser; |
||||
|
||||
fn main() { |
||||
let file = br#"<?xml version="1.0"?> |
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/"> |
||||
<rdf:Description rdf:about="http://example.com/foo"> |
||||
<rdf:type rdf:resource="http://schema.org/Person" /> |
||||
<schema:name>Foo</schema:name> |
||||
</rdf:Description> |
||||
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" /> |
||||
</rdf:RDF>"#; |
||||
|
||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
||||
let mut count = 0; |
||||
for triple in RdfXmlParser::new().parse_read(file.as_ref()) { |
||||
let triple = triple.unwrap(); |
||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
||||
count += 1; |
||||
} |
||||
} |
||||
assert_eq!(2, count); |
||||
} |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,89 @@ |
||||
use oxilangtag::LanguageTagParseError; |
||||
use oxiri::IriParseError; |
||||
use std::io; |
||||
use std::sync::Arc; |
||||
|
||||
/// Error returned during RDF/XML parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum RdfXmlParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] RdfXmlSyntaxError), |
||||
} |
||||
|
||||
impl From<RdfXmlParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlParseError) -> Self { |
||||
match error { |
||||
RdfXmlParseError::Io(error) => error, |
||||
RdfXmlParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::Error> for RdfXmlParseError { |
||||
#[inline] |
||||
fn from(error: quick_xml::Error) -> Self { |
||||
match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) |
||||
} |
||||
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind); |
||||
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Xml(#[from] quick_xml::Error), |
||||
#[error("error while parsing IRI '{iri}': {error}")] |
||||
InvalidIri { |
||||
iri: String, |
||||
#[source] |
||||
error: IriParseError, |
||||
}, |
||||
#[error("error while parsing language tag '{tag}': {error}")] |
||||
InvalidLanguageTag { |
||||
tag: String, |
||||
#[source] |
||||
error: LanguageTagParseError, |
||||
}, |
||||
#[error("{0}")] |
||||
Msg(String), |
||||
} |
||||
|
||||
impl RdfXmlSyntaxError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(SyntaxErrorKind::Msg(msg.into())) |
||||
} |
||||
} |
||||
|
||||
impl From<RdfXmlSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: RdfXmlSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Xml(error) => match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) |
||||
} |
||||
quick_xml::Error::UnexpectedEof(error) => { |
||||
Self::new(io::ErrorKind::UnexpectedEof, error) |
||||
} |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
}, |
||||
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg), |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,8 @@ |
||||
mod error; |
||||
mod parser; |
||||
mod serializer; |
||||
mod utils; |
||||
|
||||
pub use error::{RdfXmlParseError, RdfXmlSyntaxError}; |
||||
pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; |
||||
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,461 @@ |
||||
use crate::oxrdf::vocab::rdf; |
||||
use crate::oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef}; |
||||
use crate::oxrdfxml::utils::*; |
||||
use oxiri::{Iri, IriParseError}; |
||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
||||
use quick_xml::Writer; |
||||
use std::borrow::Cow; |
||||
use std::collections::BTreeMap; |
||||
use std::io; |
||||
use std::io::Write; |
||||
use std::sync::Arc; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct RdfXmlSerializer { |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl RdfXmlSerializer { |
||||
/// Builds a new [`RdfXmlSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self { |
||||
prefixes: BTreeMap::new(), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.prefixes.insert( |
||||
Iri::parse(prefix_iri.into())?.into_inner(), |
||||
prefix_name.into(), |
||||
); |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> { |
||||
ToWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// This writer does unbuffered writes.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
ToTokioAsyncWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
inner: self.inner_writer(), |
||||
} |
||||
} |
||||
|
||||
fn inner_writer(mut self) -> InnerRdfXmlWriter { |
||||
self.prefixes.insert( |
||||
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(), |
||||
"rdf".into(), |
||||
); |
||||
InnerRdfXmlWriter { |
||||
current_subject: None, |
||||
current_resource_tag: None, |
||||
prefixes: self.prefixes, |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteRdfXmlWriter<W: Write> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer)?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer.write_event(event).map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// )).await?;
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://schema.org/name")?,
|
||||
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> { |
||||
writer: Writer<W>, |
||||
inner: InnerRdfXmlWriter, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.write_triple(t, &mut buffer)?; |
||||
self.flush_buffer(&mut buffer).await |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub async fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::new(); |
||||
self.inner.finish(&mut buffer); |
||||
self.flush_buffer(&mut buffer).await?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in buffer.drain(0..) { |
||||
self.writer |
||||
.write_event_async(event) |
||||
.await |
||||
.map_err(map_err)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
pub struct InnerRdfXmlWriter { |
||||
current_subject: Option<Subject>, |
||||
current_resource_tag: Option<String>, |
||||
prefixes: BTreeMap<String, String>, |
||||
} |
||||
|
||||
impl InnerRdfXmlWriter { |
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
fn write_triple<'a>( |
||||
&mut self, |
||||
t: impl Into<TripleRef<'a>>, |
||||
output: &mut Vec<Event<'a>>, |
||||
) -> io::Result<()> { |
||||
if self.current_subject.is_none() { |
||||
self.write_start(output); |
||||
} |
||||
|
||||
let triple = t.into(); |
||||
// We open a new rdf:Description if useful
|
||||
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} |
||||
self.current_subject = Some(triple.subject.into_owned()); |
||||
|
||||
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE { |
||||
if let TermRef::NamedNode(t) = triple.object { |
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t); |
||||
let mut description_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
description_open.push_attribute(prop_xmlns); |
||||
} |
||||
self.current_resource_tag = Some(prop_qname.into_owned()); |
||||
(description_open, true) |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
} |
||||
} else { |
||||
(BytesStart::new("rdf:Description"), false) |
||||
}; |
||||
match triple.subject { |
||||
SubjectRef::NamedNode(node) => { |
||||
description_open.push_attribute(("rdf:about", node.as_str())) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
description_open.push_attribute(("rdf:nodeID", node.as_str())) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named or blank subject", |
||||
)) |
||||
} |
||||
} |
||||
output.push(Event::Start(description_open)); |
||||
if with_type_tag { |
||||
return Ok(()); // No need for a value
|
||||
} |
||||
} |
||||
|
||||
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate); |
||||
let mut property_open = BytesStart::new(prop_qname.clone()); |
||||
if let Some(prop_xmlns) = prop_xmlns { |
||||
property_open.push_attribute(prop_xmlns); |
||||
} |
||||
let content = match triple.object { |
||||
TermRef::NamedNode(node) => { |
||||
property_open.push_attribute(("rdf:resource", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::BlankNode(node) => { |
||||
property_open.push_attribute(("rdf:nodeID", node.as_str())); |
||||
None |
||||
} |
||||
TermRef::Literal(literal) => { |
||||
if let Some(language) = literal.language() { |
||||
property_open.push_attribute(("xml:lang", language)); |
||||
} else if !literal.is_plain() { |
||||
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str())); |
||||
} |
||||
Some(literal.value()) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named, blank or literal object", |
||||
)) |
||||
} |
||||
}; |
||||
if let Some(content) = content { |
||||
output.push(Event::Start(property_open)); |
||||
output.push(Event::Text(BytesText::new(content))); |
||||
output.push(Event::End(BytesEnd::new(prop_qname))); |
||||
} else { |
||||
output.push(Event::Empty(property_open)); |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
fn write_start(&self, output: &mut Vec<Event<'_>>) { |
||||
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))); |
||||
let mut rdf_open = BytesStart::new("rdf:RDF"); |
||||
for (prefix_value, prefix_name) in &self.prefixes { |
||||
rdf_open.push_attribute(( |
||||
format!("xmlns:{prefix_name}").as_str(), |
||||
prefix_value.as_str(), |
||||
)); |
||||
} |
||||
output.push(Event::Start(rdf_open)) |
||||
} |
||||
|
||||
fn finish(&mut self, output: &mut Vec<Event<'static>>) { |
||||
if self.current_subject.is_some() { |
||||
output.push(Event::End( |
||||
self.current_resource_tag |
||||
.take() |
||||
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new), |
||||
)); |
||||
} else { |
||||
self.write_start(output); |
||||
} |
||||
output.push(Event::End(BytesEnd::new("rdf:RDF"))); |
||||
} |
||||
|
||||
fn uri_to_qname_and_xmlns<'a>( |
||||
&self, |
||||
uri: NamedNodeRef<'a>, |
||||
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) { |
||||
let (prop_prefix, prop_value) = split_iri(uri.as_str()); |
||||
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) { |
||||
( |
||||
if prop_prefix.is_empty() { |
||||
Cow::Borrowed(prop_value) |
||||
} else { |
||||
Cow::Owned(format!("{prop_prefix}:{prop_value}")) |
||||
}, |
||||
None, |
||||
) |
||||
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" { |
||||
(Cow::Owned(format!("xmlns:{prop_value}")), None) |
||||
} else if prop_value.is_empty() { |
||||
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix))) |
||||
} else { |
||||
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix))) |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn map_err(error: quick_xml::Error) -> io::Error { |
||||
if let quick_xml::Error::Io(error) = error { |
||||
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) |
||||
} else { |
||||
io::Error::new(io::ErrorKind::Other, error) |
||||
} |
||||
} |
||||
|
||||
fn split_iri(iri: &str) -> (&str, &str) { |
||||
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { |
||||
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') |
||||
{ |
||||
( |
||||
&iri[..position_base + position_add], |
||||
&iri[position_base + position_add..], |
||||
) |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn test_split_iri() { |
||||
assert_eq!( |
||||
split_iri("http://schema.org/Person"), |
||||
("http://schema.org/", "Person") |
||||
); |
||||
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); |
||||
assert_eq!( |
||||
split_iri("http://schema.org#foo"), |
||||
("http://schema.org#", "foo") |
||||
); |
||||
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); |
||||
} |
||||
} |
@ -0,0 +1,26 @@ |
||||
pub fn is_name_start_char(c: char) -> bool { |
||||
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
matches!(c, |
||||
':' |
||||
| 'A'..='Z' |
||||
| '_' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}') |
||||
} |
||||
|
||||
pub fn is_name_char(c: char) -> bool { |
||||
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||
is_name_start_char(c) |
||||
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||
} |
@ -0,0 +1,65 @@ |
||||
oxsdatatypes |
||||
============ |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxsdatatypes.svg)](https://crates.io/crates/oxsdatatypes) |
||||
[![Released API docs](https://docs.rs/oxsdatatypes/badge.svg)](https://docs.rs/oxsdatatypes) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxsdatatypes)](https://crates.io/crates/oxsdatatypes) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
oxsdatatypes is an implementation of some [XML Schema Definition Language Datatypes](https://www.w3.org/TR/xmlschema11-2/). |
||||
Its main aim is to ease the implementation of SPARQL and XPath. |
||||
|
||||
Usage example: |
||||
|
||||
```rust |
||||
use std::str::FromStr; |
||||
use oxsdatatypes::Decimal; |
||||
|
||||
assert!(Decimal::from_str("22.2").unwrap() > Decimal::from_str("21").unwrap()); |
||||
``` |
||||
|
||||
Each datatype is represented by a Rust struct. |
||||
|
||||
Each datatype provides: |
||||
* `FromStr` implementation to parse a datatype string serialization following its [lexical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-lexical-mapping). |
||||
* `Display` implementation to serialize a datatype following its [canonical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-canonical-mapping). |
||||
* `is_identical_with` method following its [identity relation](https://www.w3.org/TR/xmlschema11-2/#identity). |
||||
* `PartialEq`, and `Eq` if possible, implementations following its [equality relation](https://www.w3.org/TR/xmlschema11-2/#equality). |
||||
* `PartialOrd`, and `Ord` if possible, implementations following its [order relation](https://www.w3.org/TR/xmlschema11-2/#order). |
||||
* `From` and `TryFrom` implementations to implement [XPath casting](https://www.w3.org/TR/xpath-functions-31/#casting). |
||||
* Various methods implementing [XPath functions](https://www.w3.org/TR/xpath-functions-31/). |
||||
* `from_be_bytes` and `to_be_bytes` methods for serialization. |
||||
|
||||
|
||||
### `DateTime::now` behavior |
||||
|
||||
The `DateTime::now()` function needs special OS support. |
||||
Currently: |
||||
- If the `custom-now` feature is enabled, a function computing `now` must be set: |
||||
```rust |
||||
use oxsdatatypes::Duration; |
||||
|
||||
#[no_mangle] |
||||
fn custom_ox_now() -> Duration { |
||||
unimplemented!("now implementation") |
||||
} |
||||
``` |
||||
- For `wasm32-unknown-unknown` if the `js` feature is enabled the `Date.now()` ECMAScript API is used. |
||||
- For all other targets `SystemTime::now()` is used. |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,134 @@ |
||||
use crate::oxsdatatypes::{Decimal, Double, Float, Integer}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::fmt; |
||||
use std::str::{FromStr, ParseBoolError}; |
||||
|
||||
/// [XML Schema `boolean` datatype](https://www.w3.org/TR/xmlschema11-2/#boolean)
|
||||
///
|
||||
/// Uses internally a [`bool`].
|
||||
#[derive(
|
||||
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, |
||||
)] |
||||
#[repr(transparent)] |
||||
pub struct Boolean { |
||||
value: bool, |
||||
} |
||||
|
||||
impl Boolean { |
||||
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_identical_with(self, other: Self) -> bool { |
||||
self == other |
||||
} |
||||
} |
||||
|
||||
impl From<bool> for Boolean { |
||||
#[inline] |
||||
fn from(value: bool) -> Self { |
||||
Self { value } |
||||
} |
||||
} |
||||
|
||||
impl From<Integer> for Boolean { |
||||
#[inline] |
||||
fn from(value: Integer) -> Self { |
||||
(value != Integer::from(0)).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Decimal> for Boolean { |
||||
#[inline] |
||||
fn from(value: Decimal) -> Self { |
||||
(value != Decimal::from(0)).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Float> for Boolean { |
||||
#[inline] |
||||
fn from(value: Float) -> Self { |
||||
(value != Float::from(0.) && !value.is_nan()).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Double> for Boolean { |
||||
#[inline] |
||||
fn from(value: Double) -> Self { |
||||
(value != Double::from(0.) && !value.is_nan()).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Boolean> for bool { |
||||
#[inline] |
||||
fn from(value: Boolean) -> Self { |
||||
value.value |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Boolean { |
||||
type Err = ParseBoolError; |
||||
|
||||
#[inline] |
||||
fn from_str(input: &str) -> Result<Self, Self::Err> { |
||||
Ok(match input { |
||||
"true" | "1" => true, |
||||
"false" | "0" => false, |
||||
_ => bool::from_str(input)?, |
||||
} |
||||
.into()) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Boolean { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.value.fmt(f) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn from_str() -> Result<(), ParseBoolError> { |
||||
assert_eq!(Boolean::from_str("true")?.to_string(), "true"); |
||||
assert_eq!(Boolean::from_str("1")?.to_string(), "true"); |
||||
assert_eq!(Boolean::from_str("false")?.to_string(), "false"); |
||||
assert_eq!(Boolean::from_str("0")?.to_string(), "false"); |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn from_integer() { |
||||
assert_eq!(Boolean::from(false), Integer::from(0).into()); |
||||
assert_eq!(Boolean::from(true), Integer::from(1).into()); |
||||
assert_eq!(Boolean::from(true), Integer::from(2).into()); |
||||
} |
||||
|
||||
#[test] |
||||
fn from_decimal() { |
||||
assert_eq!(Boolean::from(false), Decimal::from(0).into()); |
||||
assert_eq!(Boolean::from(true), Decimal::from(1).into()); |
||||
assert_eq!(Boolean::from(true), Decimal::from(2).into()); |
||||
} |
||||
|
||||
#[test] |
||||
fn from_float() { |
||||
assert_eq!(Boolean::from(false), Float::from(0.).into()); |
||||
assert_eq!(Boolean::from(true), Float::from(1.).into()); |
||||
assert_eq!(Boolean::from(true), Float::from(2.).into()); |
||||
assert_eq!(Boolean::from(false), Float::from(f32::NAN).into()); |
||||
assert_eq!(Boolean::from(true), Float::from(f32::INFINITY).into()); |
||||
} |
||||
|
||||
#[test] |
||||
fn from_double() { |
||||
assert_eq!(Boolean::from(false), Double::from(0.).into()); |
||||
assert_eq!(Boolean::from(true), Double::from(1.).into()); |
||||
assert_eq!(Boolean::from(true), Double::from(2.).into()); |
||||
assert_eq!(Boolean::from(false), Double::from(f64::NAN).into()); |
||||
assert_eq!(Boolean::from(true), Double::from(f64::INFINITY).into()); |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,326 @@ |
||||
use crate::oxsdatatypes::{Boolean, Float, Integer}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::cmp::Ordering; |
||||
use std::fmt; |
||||
use std::num::ParseFloatError; |
||||
use std::ops::{Add, Div, Mul, Neg, Sub}; |
||||
use std::str::FromStr; |
||||
|
||||
/// [XML Schema `double` datatype](https://www.w3.org/TR/xmlschema11-2/#double)
|
||||
///
|
||||
/// Uses internally a [`f64`].
|
||||
///
|
||||
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)] |
||||
#[repr(transparent)] |
||||
pub struct Double { |
||||
value: f64, |
||||
} |
||||
|
||||
impl Double { |
||||
pub const INFINITY: Self = Self { |
||||
value: f64::INFINITY, |
||||
}; |
||||
pub const MAX: Self = Self { value: f64::MAX }; |
||||
pub const MIN: Self = Self { value: f64::MIN }; |
||||
pub const NAN: Self = Self { value: f64::NAN }; |
||||
pub const NEG_INFINITY: Self = Self { |
||||
value: f64::NEG_INFINITY, |
||||
}; |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn from_be_bytes(bytes: [u8; 8]) -> Self { |
||||
Self { |
||||
value: f64::from_be_bytes(bytes), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn to_be_bytes(self) -> [u8; 8] { |
||||
self.value.to_be_bytes() |
||||
} |
||||
|
||||
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn abs(self) -> Self { |
||||
self.value.abs().into() |
||||
} |
||||
|
||||
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn ceil(self) -> Self { |
||||
self.value.ceil().into() |
||||
} |
||||
|
||||
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn floor(self) -> Self { |
||||
self.value.floor().into() |
||||
} |
||||
|
||||
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn round(self) -> Self { |
||||
self.value.round().into() |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_nan(self) -> bool { |
||||
self.value.is_nan() |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_finite(self) -> bool { |
||||
self.value.is_finite() |
||||
} |
||||
|
||||
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_identical_with(self, other: Self) -> bool { |
||||
self.value.to_bits() == other.value.to_bits() |
||||
} |
||||
} |
||||
|
||||
impl From<Double> for f64 { |
||||
#[inline] |
||||
fn from(value: Double) -> Self { |
||||
value.value |
||||
} |
||||
} |
||||
|
||||
impl From<f64> for Double { |
||||
#[inline] |
||||
fn from(value: f64) -> Self { |
||||
Self { value } |
||||
} |
||||
} |
||||
|
||||
impl From<i8> for Double { |
||||
#[inline] |
||||
fn from(value: i8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i16> for Double { |
||||
#[inline] |
||||
fn from(value: i16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i32> for Double { |
||||
#[inline] |
||||
fn from(value: i32) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u8> for Double { |
||||
#[inline] |
||||
fn from(value: u8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u16> for Double { |
||||
#[inline] |
||||
fn from(value: u16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u32> for Double { |
||||
#[inline] |
||||
fn from(value: u32) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<Float> for Double { |
||||
#[inline] |
||||
fn from(value: Float) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<Boolean> for Double { |
||||
#[inline] |
||||
fn from(value: Boolean) -> Self { |
||||
f64::from(bool::from(value)).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Integer> for Double { |
||||
#[inline] |
||||
#[allow(clippy::cast_precision_loss)] |
||||
fn from(value: Integer) -> Self { |
||||
(i64::from(value) as f64).into() |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Double { |
||||
type Err = ParseFloatError; |
||||
|
||||
#[inline] |
||||
fn from_str(input: &str) -> Result<Self, Self::Err> { |
||||
Ok(f64::from_str(input)?.into()) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Double { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
if self.value == f64::INFINITY { |
||||
f.write_str("INF") |
||||
} else if self.value == f64::NEG_INFINITY { |
||||
f.write_str("-INF") |
||||
} else { |
||||
self.value.fmt(f) |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd for Double { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
||||
self.value.partial_cmp(&other.value) |
||||
} |
||||
} |
||||
|
||||
impl Neg for Double { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn neg(self) -> Self { |
||||
(-self.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Add for Double { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn add(self, rhs: Self) -> Self { |
||||
(self.value + rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Sub for Double { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn sub(self, rhs: Self) -> Self { |
||||
(self.value - rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Mul for Double { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn mul(self, rhs: Self) -> Self { |
||||
(self.value * rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Div for Double { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn div(self, rhs: Self) -> Self { |
||||
(self.value / rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn eq() { |
||||
assert_eq!(Double::from(0_f64), Double::from(0_f64)); |
||||
assert_ne!(Double::NAN, Double::NAN); |
||||
assert_eq!(Double::from(-0.), Double::from(0.)); |
||||
} |
||||
|
||||
#[test] |
||||
fn cmp() { |
||||
assert_eq!( |
||||
Double::from(0.).partial_cmp(&Double::from(0.)), |
||||
Some(Ordering::Equal) |
||||
); |
||||
assert_eq!( |
||||
Double::INFINITY.partial_cmp(&Double::MAX), |
||||
Some(Ordering::Greater) |
||||
); |
||||
assert_eq!( |
||||
Double::NEG_INFINITY.partial_cmp(&Double::MIN), |
||||
Some(Ordering::Less) |
||||
); |
||||
assert_eq!(Double::NAN.partial_cmp(&Double::from(0.)), None); |
||||
assert_eq!(Double::NAN.partial_cmp(&Double::NAN), None); |
||||
assert_eq!( |
||||
Double::from(0.).partial_cmp(&Double::from(-0.)), |
||||
Some(Ordering::Equal) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn is_identical_with() { |
||||
assert!(Double::from(0.).is_identical_with(Double::from(0.))); |
||||
assert!(Double::NAN.is_identical_with(Double::NAN)); |
||||
assert!(!Double::from(-0.).is_identical_with(Double::from(0.))); |
||||
} |
||||
|
||||
#[test] |
||||
fn from_str() -> Result<(), ParseFloatError> { |
||||
assert_eq!(Double::from_str("NaN")?.to_string(), "NaN"); |
||||
assert_eq!(Double::from_str("INF")?.to_string(), "INF"); |
||||
assert_eq!(Double::from_str("+INF")?.to_string(), "INF"); |
||||
assert_eq!(Double::from_str("-INF")?.to_string(), "-INF"); |
||||
assert_eq!(Double::from_str("0.0E0")?.to_string(), "0"); |
||||
assert_eq!(Double::from_str("-0.0E0")?.to_string(), "-0"); |
||||
assert_eq!(Double::from_str("0.1e1")?.to_string(), "1"); |
||||
assert_eq!(Double::from_str("-0.1e1")?.to_string(), "-1"); |
||||
assert_eq!(Double::from_str("1.e1")?.to_string(), "10"); |
||||
assert_eq!(Double::from_str("-1.e1")?.to_string(), "-10"); |
||||
assert_eq!(Double::from_str("1")?.to_string(), "1"); |
||||
assert_eq!(Double::from_str("-1")?.to_string(), "-1"); |
||||
assert_eq!(Double::from_str("1.")?.to_string(), "1"); |
||||
assert_eq!(Double::from_str("-1.")?.to_string(), "-1"); |
||||
assert_eq!( |
||||
Double::from_str(&f64::MIN.to_string()).unwrap(), |
||||
Double::MIN |
||||
); |
||||
assert_eq!( |
||||
Double::from_str(&f64::MAX.to_string()).unwrap(), |
||||
Double::MAX |
||||
); |
||||
Ok(()) |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,310 @@ |
||||
use crate::oxsdatatypes::{Boolean, Double, Integer}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::cmp::Ordering; |
||||
use std::fmt; |
||||
use std::num::ParseFloatError; |
||||
use std::ops::{Add, Div, Mul, Neg, Sub}; |
||||
use std::str::FromStr; |
||||
|
||||
/// [XML Schema `float` datatype](https://www.w3.org/TR/xmlschema11-2/#float)
|
||||
///
|
||||
/// Uses internally a [`f32`].
|
||||
///
|
||||
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)] |
||||
#[repr(transparent)] |
||||
pub struct Float { |
||||
value: f32, |
||||
} |
||||
|
||||
impl Float { |
||||
pub const INFINITY: Self = Self { |
||||
value: f32::INFINITY, |
||||
}; |
||||
pub const MAX: Self = Self { value: f32::MAX }; |
||||
pub const MIN: Self = Self { value: f32::MIN }; |
||||
pub const NAN: Self = Self { value: f32::NAN }; |
||||
pub const NEG_INFINITY: Self = Self { |
||||
value: f32::NEG_INFINITY, |
||||
}; |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn from_be_bytes(bytes: [u8; 4]) -> Self { |
||||
Self { |
||||
value: f32::from_be_bytes(bytes), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn to_be_bytes(self) -> [u8; 4] { |
||||
self.value.to_be_bytes() |
||||
} |
||||
|
||||
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn abs(self) -> Self { |
||||
self.value.abs().into() |
||||
} |
||||
|
||||
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn ceil(self) -> Self { |
||||
self.value.ceil().into() |
||||
} |
||||
|
||||
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn floor(self) -> Self { |
||||
self.value.floor().into() |
||||
} |
||||
|
||||
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn round(self) -> Self { |
||||
self.value.round().into() |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_nan(self) -> bool { |
||||
self.value.is_nan() |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_finite(self) -> bool { |
||||
self.value.is_finite() |
||||
} |
||||
|
||||
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_identical_with(self, other: Self) -> bool { |
||||
self.value.to_bits() == other.value.to_bits() |
||||
} |
||||
} |
||||
|
||||
impl From<Float> for f32 { |
||||
#[inline] |
||||
fn from(value: Float) -> Self { |
||||
value.value |
||||
} |
||||
} |
||||
|
||||
impl From<Float> for f64 { |
||||
#[inline] |
||||
fn from(value: Float) -> Self { |
||||
value.value.into() |
||||
} |
||||
} |
||||
|
||||
impl From<f32> for Float { |
||||
#[inline] |
||||
fn from(value: f32) -> Self { |
||||
Self { value } |
||||
} |
||||
} |
||||
|
||||
impl From<i8> for Float { |
||||
#[inline] |
||||
fn from(value: i8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i16> for Float { |
||||
#[inline] |
||||
fn from(value: i16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u8> for Float { |
||||
#[inline] |
||||
fn from(value: u8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u16> for Float { |
||||
#[inline] |
||||
fn from(value: u16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<Boolean> for Float { |
||||
#[inline] |
||||
fn from(value: Boolean) -> Self { |
||||
f32::from(bool::from(value)).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Integer> for Float { |
||||
#[inline] |
||||
#[allow(clippy::cast_precision_loss)] |
||||
fn from(value: Integer) -> Self { |
||||
(i64::from(value) as f32).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Double> for Float { |
||||
#[inline] |
||||
#[allow(clippy::cast_possible_truncation)] |
||||
fn from(value: Double) -> Self { |
||||
Self { |
||||
value: f64::from(value) as f32, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Float { |
||||
type Err = ParseFloatError; |
||||
|
||||
#[inline] |
||||
fn from_str(input: &str) -> Result<Self, Self::Err> { |
||||
Ok(f32::from_str(input)?.into()) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Float { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
if self.value == f32::INFINITY { |
||||
f.write_str("INF") |
||||
} else if self.value == f32::NEG_INFINITY { |
||||
f.write_str("-INF") |
||||
} else { |
||||
self.value.fmt(f) |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl PartialOrd for Float { |
||||
#[inline] |
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
||||
self.value.partial_cmp(&other.value) |
||||
} |
||||
} |
||||
|
||||
impl Neg for Float { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn neg(self) -> Self { |
||||
(-self.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Add for Float { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn add(self, rhs: Self) -> Self { |
||||
(self.value + rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Sub for Float { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn sub(self, rhs: Self) -> Self { |
||||
(self.value - rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Mul for Float { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn mul(self, rhs: Self) -> Self { |
||||
(self.value * rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
impl Div for Float { |
||||
type Output = Self; |
||||
|
||||
#[inline] |
||||
fn div(self, rhs: Self) -> Self { |
||||
(self.value / rhs.value).into() |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn eq() { |
||||
assert_eq!(Float::from(0.), Float::from(0.)); |
||||
assert_ne!(Float::NAN, Float::NAN); |
||||
assert_eq!(Float::from(-0.), Float::from(0.)); |
||||
} |
||||
|
||||
#[test] |
||||
fn cmp() { |
||||
assert_eq!( |
||||
Float::from(0.).partial_cmp(&Float::from(0.)), |
||||
Some(Ordering::Equal) |
||||
); |
||||
assert_eq!( |
||||
Float::INFINITY.partial_cmp(&Float::MAX), |
||||
Some(Ordering::Greater) |
||||
); |
||||
assert_eq!( |
||||
Float::NEG_INFINITY.partial_cmp(&Float::MIN), |
||||
Some(Ordering::Less) |
||||
); |
||||
assert_eq!(Float::NAN.partial_cmp(&Float::from(0.)), None); |
||||
assert_eq!(Float::NAN.partial_cmp(&Float::NAN), None); |
||||
assert_eq!( |
||||
Float::from(0.).partial_cmp(&Float::from(-0.)), |
||||
Some(Ordering::Equal) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn is_identical_with() { |
||||
assert!(Float::from(0.).is_identical_with(Float::from(0.))); |
||||
assert!(Float::NAN.is_identical_with(Float::NAN)); |
||||
assert!(!Float::from(-0.).is_identical_with(Float::from(0.))); |
||||
} |
||||
|
||||
#[test] |
||||
fn from_str() -> Result<(), ParseFloatError> { |
||||
assert_eq!(Float::from_str("NaN")?.to_string(), "NaN"); |
||||
assert_eq!(Float::from_str("INF")?.to_string(), "INF"); |
||||
assert_eq!(Float::from_str("+INF")?.to_string(), "INF"); |
||||
assert_eq!(Float::from_str("-INF")?.to_string(), "-INF"); |
||||
assert_eq!(Float::from_str("0.0E0")?.to_string(), "0"); |
||||
assert_eq!(Float::from_str("-0.0E0")?.to_string(), "-0"); |
||||
assert_eq!(Float::from_str("0.1e1")?.to_string(), "1"); |
||||
assert_eq!(Float::from_str("-0.1e1")?.to_string(), "-1"); |
||||
assert_eq!(Float::from_str("1.e1")?.to_string(), "10"); |
||||
assert_eq!(Float::from_str("-1.e1")?.to_string(), "-10"); |
||||
assert_eq!(Float::from_str("1")?.to_string(), "1"); |
||||
assert_eq!(Float::from_str("-1")?.to_string(), "-1"); |
||||
assert_eq!(Float::from_str("1.")?.to_string(), "1"); |
||||
assert_eq!(Float::from_str("-1.")?.to_string(), "-1"); |
||||
assert_eq!(Float::from_str(&f32::MIN.to_string())?, Float::MIN); |
||||
assert_eq!(Float::from_str(&f32::MAX.to_string())?, Float::MAX); |
||||
Ok(()) |
||||
} |
||||
} |
@ -0,0 +1,400 @@ |
||||
use crate::oxsdatatypes::{Boolean, Decimal, Double, Float}; |
||||
use serde::{Deserialize, Serialize}; |
||||
use std::fmt; |
||||
use std::num::ParseIntError; |
||||
use std::str::FromStr; |
||||
|
||||
/// [XML Schema `integer` datatype](https://www.w3.org/TR/xmlschema11-2/#integer)
|
||||
///
|
||||
/// Uses internally a [`i64`].
|
||||
#[derive(
|
||||
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, |
||||
)] |
||||
#[repr(transparent)] |
||||
pub struct Integer { |
||||
value: i64, |
||||
} |
||||
|
||||
impl Integer { |
||||
pub const MAX: Self = Self { value: i64::MAX }; |
||||
pub const MIN: Self = Self { value: i64::MIN }; |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn from_be_bytes(bytes: [u8; 8]) -> Self { |
||||
Self { |
||||
value: i64::from_be_bytes(bytes), |
||||
} |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn to_be_bytes(self) -> [u8; 8] { |
||||
self.value.to_be_bytes() |
||||
} |
||||
|
||||
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add)
|
||||
///
|
||||
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_add(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract)
|
||||
///
|
||||
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_sub(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply)
|
||||
///
|
||||
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_mul(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// [op:numeric-integer-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-integer-divide)
|
||||
///
|
||||
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_div(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod)
|
||||
///
|
||||
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_rem(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// Euclidean remainder
|
||||
///
|
||||
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_rem_euclid(rhs.into().value)?, |
||||
}) |
||||
} |
||||
|
||||
/// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus)
|
||||
///
|
||||
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_neg(self) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_neg()?, |
||||
}) |
||||
} |
||||
|
||||
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
|
||||
///
|
||||
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn checked_abs(self) -> Option<Self> { |
||||
Some(Self { |
||||
value: self.value.checked_abs()?, |
||||
}) |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub const fn is_negative(self) -> bool { |
||||
self.value < 0 |
||||
} |
||||
|
||||
#[inline] |
||||
#[must_use] |
||||
pub const fn is_positive(self) -> bool { |
||||
self.value > 0 |
||||
} |
||||
|
||||
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
|
||||
#[inline] |
||||
#[must_use] |
||||
pub fn is_identical_with(self, other: Self) -> bool { |
||||
self == other |
||||
} |
||||
} |
||||
|
||||
impl From<bool> for Integer { |
||||
#[inline] |
||||
fn from(value: bool) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i8> for Integer { |
||||
#[inline] |
||||
fn from(value: i8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i16> for Integer { |
||||
#[inline] |
||||
fn from(value: i16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i32> for Integer { |
||||
#[inline] |
||||
fn from(value: i32) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<i64> for Integer { |
||||
#[inline] |
||||
fn from(value: i64) -> Self { |
||||
Self { value } |
||||
} |
||||
} |
||||
|
||||
impl From<u8> for Integer { |
||||
#[inline] |
||||
fn from(value: u8) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u16> for Integer { |
||||
#[inline] |
||||
fn from(value: u16) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<u32> for Integer { |
||||
#[inline] |
||||
fn from(value: u32) -> Self { |
||||
Self { |
||||
value: value.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<Boolean> for Integer { |
||||
#[inline] |
||||
fn from(value: Boolean) -> Self { |
||||
bool::from(value).into() |
||||
} |
||||
} |
||||
|
||||
impl From<Integer> for i64 { |
||||
#[inline] |
||||
fn from(value: Integer) -> Self { |
||||
value.value |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Integer { |
||||
type Err = ParseIntError; |
||||
|
||||
#[inline] |
||||
fn from_str(input: &str) -> Result<Self, Self::Err> { |
||||
Ok(i64::from_str(input)?.into()) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Integer { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
self.value.fmt(f) |
||||
} |
||||
} |
||||
|
||||
impl TryFrom<Float> for Integer { |
||||
type Error = TooLargeForIntegerError; |
||||
|
||||
#[inline] |
||||
fn try_from(value: Float) -> Result<Self, Self::Error> { |
||||
Decimal::try_from(value) |
||||
.map_err(|_| TooLargeForIntegerError)? |
||||
.try_into() |
||||
} |
||||
} |
||||
|
||||
impl TryFrom<Double> for Integer { |
||||
type Error = TooLargeForIntegerError; |
||||
|
||||
#[inline] |
||||
fn try_from(value: Double) -> Result<Self, Self::Error> { |
||||
Decimal::try_from(value) |
||||
.map_err(|_| TooLargeForIntegerError)? |
||||
.try_into() |
||||
} |
||||
} |
||||
|
||||
/// The input is too large to fit into an [`Integer`].
|
||||
///
|
||||
/// Matches XPath [`FOCA0003` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0003).
|
||||
#[derive(Debug, Clone, Copy, thiserror::Error)] |
||||
#[error("Value too large for xsd:integer internal representation")] |
||||
pub struct TooLargeForIntegerError; |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
|
||||
#[test] |
||||
fn from_str() -> Result<(), ParseIntError> { |
||||
assert_eq!(Integer::from_str("0")?.to_string(), "0"); |
||||
assert_eq!(Integer::from_str("-0")?.to_string(), "0"); |
||||
assert_eq!(Integer::from_str("123")?.to_string(), "123"); |
||||
assert_eq!(Integer::from_str("-123")?.to_string(), "-123"); |
||||
Integer::from_str("123456789123456789123456789123456789123456789").unwrap_err(); |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn from_float() -> Result<(), ParseIntError> { |
||||
assert_eq!( |
||||
Integer::try_from(Float::from(0.)).ok(), |
||||
Some(Integer::from_str("0")?) |
||||
); |
||||
assert_eq!( |
||||
Integer::try_from(Float::from(-0.)).ok(), |
||||
Some(Integer::from_str("0")?) |
||||
); |
||||
assert_eq!( |
||||
Integer::try_from(Float::from(-123.1)).ok(), |
||||
Some(Integer::from_str("-123")?) |
||||
); |
||||
Integer::try_from(Float::from(f32::NAN)).unwrap_err(); |
||||
Integer::try_from(Float::from(f32::INFINITY)).unwrap_err(); |
||||
Integer::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err(); |
||||
Integer::try_from(Float::from(f32::MIN)).unwrap_err(); |
||||
Integer::try_from(Float::from(f32::MAX)).unwrap_err(); |
||||
assert!( |
||||
Integer::try_from(Float::from(1_672_507_300_000.)) |
||||
.unwrap() |
||||
.checked_sub(Integer::from_str("1672507300000")?) |
||||
.unwrap() |
||||
.checked_abs() |
||||
.unwrap() |
||||
< Integer::from(1_000_000) |
||||
); |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn from_double() -> Result<(), ParseIntError> { |
||||
assert_eq!( |
||||
Integer::try_from(Double::from(0.0)).ok(), |
||||
Some(Integer::from_str("0")?) |
||||
); |
||||
assert_eq!( |
||||
Integer::try_from(Double::from(-0.0)).ok(), |
||||
Some(Integer::from_str("0")?) |
||||
); |
||||
assert_eq!( |
||||
Integer::try_from(Double::from(-123.1)).ok(), |
||||
Some(Integer::from_str("-123")?) |
||||
); |
||||
assert!( |
||||
Integer::try_from(Double::from(1_672_507_300_000.)) |
||||
.unwrap() |
||||
.checked_sub(Integer::from_str("1672507300000").unwrap()) |
||||
.unwrap() |
||||
.checked_abs() |
||||
.unwrap() |
||||
< Integer::from(10) |
||||
); |
||||
Integer::try_from(Double::from(f64::NAN)).unwrap_err(); |
||||
Integer::try_from(Double::from(f64::INFINITY)).unwrap_err(); |
||||
Integer::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err(); |
||||
Integer::try_from(Double::from(f64::MIN)).unwrap_err(); |
||||
Integer::try_from(Double::from(f64::MAX)).unwrap_err(); |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn from_decimal() -> Result<(), ParseIntError> { |
||||
assert_eq!( |
||||
Integer::try_from(Decimal::from(0)).ok(), |
||||
Some(Integer::from_str("0")?) |
||||
); |
||||
assert_eq!( |
||||
Integer::try_from(Decimal::from_str("-123.1").unwrap()).ok(), |
||||
Some(Integer::from_str("-123")?) |
||||
); |
||||
Integer::try_from(Decimal::MIN).unwrap_err(); |
||||
Integer::try_from(Decimal::MAX).unwrap_err(); |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn add() { |
||||
assert_eq!( |
||||
Integer::MIN.checked_add(1), |
||||
Some(Integer::from(i64::MIN + 1)) |
||||
); |
||||
assert_eq!(Integer::MAX.checked_add(1), None); |
||||
} |
||||
|
||||
#[test] |
||||
fn sub() { |
||||
assert_eq!(Integer::MIN.checked_sub(1), None); |
||||
assert_eq!( |
||||
Integer::MAX.checked_sub(1), |
||||
Some(Integer::from(i64::MAX - 1)) |
||||
); |
||||
} |
||||
|
||||
#[test] |
||||
fn mul() { |
||||
assert_eq!(Integer::MIN.checked_mul(2), None); |
||||
assert_eq!(Integer::MAX.checked_mul(2), None); |
||||
} |
||||
|
||||
#[test] |
||||
fn div() { |
||||
assert_eq!(Integer::from(1).checked_div(0), None); |
||||
} |
||||
|
||||
#[test] |
||||
fn rem() { |
||||
assert_eq!(Integer::from(10).checked_rem(3), Some(Integer::from(1))); |
||||
assert_eq!(Integer::from(6).checked_rem(-2), Some(Integer::from(0))); |
||||
assert_eq!(Integer::from(1).checked_rem(0), None); |
||||
} |
||||
} |
@ -0,0 +1,21 @@ |
||||
mod boolean; |
||||
mod date_time; |
||||
mod decimal; |
||||
mod double; |
||||
mod duration; |
||||
mod float; |
||||
mod integer; |
||||
|
||||
pub use self::boolean::Boolean; |
||||
pub use self::date_time::{ |
||||
Date, DateTime, DateTimeOverflowError, GDay, GMonth, GMonthDay, GYear, GYearMonth, |
||||
InvalidTimezoneError, ParseDateTimeError, Time, TimezoneOffset, |
||||
}; |
||||
pub use self::decimal::{Decimal, ParseDecimalError, TooLargeForDecimalError}; |
||||
pub use self::double::Double; |
||||
pub use self::duration::{ |
||||
DayTimeDuration, Duration, DurationOverflowError, OppositeSignInDurationComponentsError, |
||||
ParseDurationError, YearMonthDuration, |
||||
}; |
||||
pub use self::float::Float; |
||||
pub use self::integer::{Integer, TooLargeForIntegerError}; |
@ -0,0 +1,54 @@ |
||||
OxTTL |
||||
===== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxttl.svg)](https://crates.io/crates/oxttl) |
||||
[![Released API docs](https://docs.rs/oxttl/badge.svg)](https://docs.rs/oxttl) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxttl)](https://crates.io/crates/oxttl) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
Oxttl is a set of parsers and serializers for [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [N3](https://w3c.github.io/N3/spec/). |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for all languages but N3 ([Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star)) |
||||
|
||||
It is designed as a low level parser compatible with both synchronous and asynchronous I/O. |
||||
|
||||
Usage example counting the number of people in a Turtle file: |
||||
```rust |
||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
||||
use oxttl::TurtleParser; |
||||
|
||||
let file = b"@base <http://example.com/> . |
||||
@prefix schema: <http://schema.org/> . |
||||
<foo> a schema:Person ; |
||||
schema:name \"Foo\" . |
||||
<bar> a schema:Person ; |
||||
schema:name \"Bar\" ."; |
||||
|
||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
||||
let mut count = 0; |
||||
for triple in TurtleParser::new().parse_read(file.as_ref()) { |
||||
let triple = triple.unwrap(); |
||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
||||
count += 1; |
||||
} |
||||
} |
||||
assert_eq!(2, count); |
||||
``` |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,977 @@ |
||||
use crate::oxrdf::NamedNode; |
||||
use crate::oxttl::toolkit::{TokenRecognizer, TokenRecognizerError}; |
||||
use memchr::{memchr, memchr2}; |
||||
use oxilangtag::LanguageTag; |
||||
use oxiri::Iri; |
||||
use std::borrow::Cow; |
||||
use std::cmp::min; |
||||
use std::collections::HashMap; |
||||
use std::ops::Range; |
||||
use std::str; |
||||
|
||||
#[derive(Debug, PartialEq, Eq)] |
||||
pub enum N3Token<'a> { |
||||
IriRef(String), |
||||
PrefixedName { |
||||
prefix: &'a str, |
||||
local: Cow<'a, str>, |
||||
might_be_invalid_iri: bool, |
||||
}, |
||||
Variable(Cow<'a, str>), |
||||
BlankNodeLabel(&'a str), |
||||
String(String), |
||||
Integer(&'a str), |
||||
Decimal(&'a str), |
||||
Double(&'a str), |
||||
LangTag(&'a str), |
||||
Punctuation(&'a str), |
||||
PlainKeyword(&'a str), |
||||
} |
||||
|
||||
#[derive(Eq, PartialEq)] |
||||
pub enum N3LexerMode { |
||||
NTriples, |
||||
Turtle, |
||||
N3, |
||||
} |
||||
|
||||
#[derive(Default)] |
||||
pub struct N3LexerOptions { |
||||
pub base_iri: Option<Iri<String>>, |
||||
} |
||||
|
||||
pub struct N3Lexer { |
||||
mode: N3LexerMode, |
||||
unchecked: bool, |
||||
} |
||||
|
||||
// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
|
||||
// TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true?
|
||||
|
||||
impl TokenRecognizer for N3Lexer { |
||||
type Token<'a> = N3Token<'a>; |
||||
type Options = N3LexerOptions; |
||||
|
||||
fn recognize_next_token<'a>( |
||||
&mut self, |
||||
data: &'a [u8], |
||||
is_ending: bool, |
||||
options: &N3LexerOptions, |
||||
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { |
||||
match *data.first()? { |
||||
b'<' => match *data.get(1)? { |
||||
b'<' => Some((2, Ok(N3Token::Punctuation("<<")))), |
||||
b'=' if self.mode == N3LexerMode::N3 => { |
||||
if let Some((consumed, result)) = self.recognize_iri(data, options) { |
||||
Some(if let Ok(result) = result { |
||||
(consumed, Ok(result)) |
||||
} else { |
||||
(2, Ok(N3Token::Punctuation("<="))) |
||||
}) |
||||
} else if is_ending { |
||||
Some((2, Ok(N3Token::Punctuation("<=")))) |
||||
} else { |
||||
None |
||||
} |
||||
} |
||||
b'-' if self.mode == N3LexerMode::N3 => { |
||||
if let Some((consumed, result)) = self.recognize_iri(data, options) { |
||||
Some(if let Ok(result) = result { |
||||
(consumed, Ok(result)) |
||||
} else { |
||||
(2, Ok(N3Token::Punctuation("<-"))) |
||||
}) |
||||
} else if is_ending { |
||||
Some((2, Ok(N3Token::Punctuation("<-")))) |
||||
} else { |
||||
None |
||||
} |
||||
} |
||||
_ => self.recognize_iri(data, options), |
||||
}, |
||||
b'>' => { |
||||
if *data.get(1)? == b'>' { |
||||
Some((2, Ok(N3Token::Punctuation(">>")))) |
||||
} else { |
||||
Some((1, Ok(N3Token::Punctuation(">")))) |
||||
} |
||||
} |
||||
b'_' => match data.get(1)? { |
||||
b':' => Self::recognize_blank_node_label(data), |
||||
c => Some(( |
||||
1, |
||||
Err((0, format!("Unexpected character '{}'", char::from(*c))).into()), |
||||
)), |
||||
}, |
||||
b'"' => { |
||||
if self.mode != N3LexerMode::NTriples |
||||
&& *data.get(1)? == b'"' |
||||
&& *data.get(2)? == b'"' |
||||
{ |
||||
Self::recognize_long_string(data, b'"') |
||||
} else { |
||||
Self::recognize_string(data, b'"') |
||||
} |
||||
} |
||||
b'\'' if self.mode != N3LexerMode::NTriples => { |
||||
if *data.get(1)? == b'\'' && *data.get(2)? == b'\'' { |
||||
Self::recognize_long_string(data, b'\'') |
||||
} else { |
||||
Self::recognize_string(data, b'\'') |
||||
} |
||||
} |
||||
b'@' => self.recognize_lang_tag(data), |
||||
b'.' => match data.get(1) { |
||||
Some(b'0'..=b'9') => Self::recognize_number(data), |
||||
Some(_) => Some((1, Ok(N3Token::Punctuation(".")))), |
||||
None => is_ending.then_some((1, Ok(N3Token::Punctuation(".")))), |
||||
}, |
||||
b'^' => { |
||||
if *data.get(1)? == b'^' { |
||||
Some((2, Ok(N3Token::Punctuation("^^")))) |
||||
} else { |
||||
Some((1, Ok(N3Token::Punctuation("^")))) |
||||
} |
||||
} |
||||
b'(' => Some((1, Ok(N3Token::Punctuation("(")))), |
||||
b')' => Some((1, Ok(N3Token::Punctuation(")")))), |
||||
b'[' => Some((1, Ok(N3Token::Punctuation("[")))), |
||||
b']' => Some((1, Ok(N3Token::Punctuation("]")))), |
||||
b'{' => { |
||||
if *data.get(1)? == b'|' { |
||||
Some((2, Ok(N3Token::Punctuation("{|")))) |
||||
} else { |
||||
Some((1, Ok(N3Token::Punctuation("{")))) |
||||
} |
||||
} |
||||
b'}' => Some((1, Ok(N3Token::Punctuation("}")))), |
||||
b',' => Some((1, Ok(N3Token::Punctuation(",")))), |
||||
b';' => Some((1, Ok(N3Token::Punctuation(";")))), |
||||
b'!' => Some((1, Ok(N3Token::Punctuation("!")))), |
||||
b'|' => { |
||||
if *data.get(1)? == b'}' { |
||||
Some((2, Ok(N3Token::Punctuation("|}")))) |
||||
} else { |
||||
Some((1, Ok(N3Token::Punctuation("|")))) |
||||
} |
||||
} |
||||
b'=' => { |
||||
if *data.get(1)? == b'>' { |
||||
Some((2, Ok(N3Token::Punctuation("=>")))) |
||||
} else { |
||||
Some((1, Ok(N3Token::Punctuation("=")))) |
||||
} |
||||
} |
||||
b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data), |
||||
b'?' => self.recognize_variable(data, is_ending), |
||||
_ => self.recognize_pname_or_keyword(data, is_ending), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl N3Lexer { |
||||
pub fn new(mode: N3LexerMode, unchecked: bool) -> Self { |
||||
Self { mode, unchecked } |
||||
} |
||||
|
||||
fn recognize_iri( |
||||
&self, |
||||
data: &[u8], |
||||
options: &N3LexerOptions, |
||||
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||
// [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
|
||||
let mut string = Vec::new(); |
||||
let mut i = 1; |
||||
loop { |
||||
let end = memchr2(b'>', b'\\', &data[i..])?; |
||||
string.extend_from_slice(&data[i..i + end]); |
||||
i += end; |
||||
match data[i] { |
||||
b'>' => { |
||||
#[allow(clippy::range_plus_one)] |
||||
return Some((i + 1, self.parse_iri(string, 0..i + 1, options))); |
||||
} |
||||
b'\\' => { |
||||
let (additional, c) = Self::recognize_escape(&data[i..], i, false)?; |
||||
i += additional + 1; |
||||
match c { |
||||
Ok(c) => { |
||||
let mut buf = [0; 4]; |
||||
string.extend_from_slice(c.encode_utf8(&mut buf).as_bytes()); |
||||
} |
||||
Err(e) => return Some((i, Err(e))), |
||||
} |
||||
} |
||||
_ => unreachable!(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn parse_iri( |
||||
&self, |
||||
iri: Vec<u8>, |
||||
position: Range<usize>, |
||||
options: &N3LexerOptions, |
||||
) -> Result<N3Token<'static>, TokenRecognizerError> { |
||||
let iri = string_from_utf8(iri, position.clone())?; |
||||
Ok(N3Token::IriRef( |
||||
if let Some(base_iri) = options.base_iri.as_ref() { |
||||
if self.unchecked { |
||||
base_iri.resolve_unchecked(&iri) |
||||
} else { |
||||
base_iri |
||||
.resolve(&iri) |
||||
.map_err(|e| (position, e.to_string()))? |
||||
} |
||||
.into_inner() |
||||
} else if self.unchecked { |
||||
iri |
||||
} else { |
||||
Iri::parse(iri) |
||||
.map_err(|e| (position, e.to_string()))? |
||||
.into_inner() |
||||
}, |
||||
)) |
||||
} |
||||
|
||||
fn recognize_pname_or_keyword<'a>( |
||||
&self, |
||||
data: &'a [u8], |
||||
is_ending: bool, |
||||
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { |
||||
// [139s] PNAME_NS ::= PN_PREFIX? ':'
|
||||
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
|
||||
// [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
|
||||
let mut i = 0; |
||||
loop { |
||||
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { |
||||
match r { |
||||
Ok((c, consumed)) => { |
||||
if c == ':' { |
||||
i += consumed; |
||||
break; |
||||
} else if i == 0 { |
||||
if !Self::is_possible_pn_chars_base(c) { |
||||
return Some(( |
||||
consumed, |
||||
Err(( |
||||
0..consumed, |
||||
format!( |
||||
"'{c}' is not allowed at the beginning of a prefix name" |
||||
), |
||||
) |
||||
.into()), |
||||
)); |
||||
} |
||||
i += consumed; |
||||
} else if Self::is_possible_pn_chars(c) || c == '.' { |
||||
i += consumed; |
||||
} else { |
||||
while data[..i].ends_with(b".") { |
||||
i -= 1; |
||||
} |
||||
return Some(( |
||||
i, |
||||
str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), |
||||
)); |
||||
} |
||||
} |
||||
Err(e) => return Some((e.location.end, Err(e))), |
||||
} |
||||
} else if is_ending { |
||||
while data[..i].ends_with(b".") { |
||||
i -= 1; |
||||
} |
||||
return Some(if i == 0 { |
||||
( |
||||
1, |
||||
Err((0..1, format!("Unexpected byte {}", data[0])).into()), |
||||
) |
||||
} else { |
||||
( |
||||
i, |
||||
str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), |
||||
) |
||||
}); |
||||
} else { |
||||
return None; |
||||
} |
||||
} |
||||
let pn_prefix = match str_from_utf8(&data[..i - 1], 0..i - 1) { |
||||
Ok(pn_prefix) => pn_prefix, |
||||
Err(e) => return Some((i, Err(e))), |
||||
}; |
||||
if pn_prefix.ends_with('.') { |
||||
return Some(( |
||||
i, |
||||
Err(( |
||||
0..i, |
||||
format!( |
||||
"'{pn_prefix}' is not a valid prefix: prefixes are not allowed to end with '.'"), |
||||
) |
||||
.into()), |
||||
)); |
||||
} |
||||
|
||||
let (consumed, pn_local_result) = |
||||
self.recognize_optional_pn_local(&data[i..], is_ending)?; |
||||
Some(( |
||||
consumed + i, |
||||
pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName { |
||||
prefix: pn_prefix, |
||||
local, |
||||
might_be_invalid_iri, |
||||
}), |
||||
)) |
||||
} |
||||
|
||||
fn recognize_variable<'a>( |
||||
&self, |
||||
data: &'a [u8], |
||||
is_ending: bool, |
||||
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { |
||||
// [36] QUICK_VAR_NAME ::= "?" PN_LOCAL
|
||||
let (consumed, result) = self.recognize_optional_pn_local(&data[1..], is_ending)?; |
||||
Some(( |
||||
consumed + 1, |
||||
result.and_then(|(name, _)| { |
||||
if name.is_empty() { |
||||
Err((0..consumed, "A variable name is not allowed to be empty").into()) |
||||
} else { |
||||
Ok(N3Token::Variable(name)) |
||||
} |
||||
}), |
||||
)) |
||||
} |
||||
|
||||
fn recognize_optional_pn_local<'a>( |
||||
&self, |
||||
data: &'a [u8], |
||||
is_ending: bool, |
||||
) -> Option<(usize, Result<(Cow<'a, str>, bool), TokenRecognizerError>)> { |
||||
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
|
||||
let mut i = 0; |
||||
let mut buffer = None; // Buffer if there are some escaped characters
|
||||
let mut position_that_is_already_in_buffer = 0; |
||||
let mut might_be_invalid_iri = false; |
||||
let mut ends_with_unescaped_dot = 0; |
||||
loop { |
||||
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) { |
||||
match r { |
||||
Ok((c, consumed)) => { |
||||
if c == '%' { |
||||
i += 1; |
||||
let a = char::from(*data.get(i)?); |
||||
i += 1; |
||||
let b = char::from(*data.get(i)?); |
||||
if !a.is_ascii_hexdigit() || !b.is_ascii_hexdigit() { |
||||
return Some((i + 1, Err(( |
||||
i - 2..=i, format!("escapes in IRIs should be % followed by two hexadecimal characters, found '%{a}{b}'") |
||||
).into()))); |
||||
} |
||||
i += 1; |
||||
ends_with_unescaped_dot = 0; |
||||
} else if c == '\\' { |
||||
i += 1; |
||||
let a = char::from(*data.get(i)?); |
||||
if self.unchecked |
||||
|| matches!( |
||||
a, |
||||
'_' | '~' |
||||
| '.' |
||||
| '-' |
||||
| '!' |
||||
| '$' |
||||
| '&' |
||||
| '\'' |
||||
| '(' |
||||
| ')' |
||||
| '*' |
||||
| '+' |
||||
| ',' |
||||
| ';' |
||||
| '=' |
||||
) |
||||
{ |
||||
// ok to escape
|
||||
} else if matches!(a, '/' | '?' | '#' | '@' | '%') { |
||||
// ok to escape but requires IRI validation
|
||||
might_be_invalid_iri = true; |
||||
} else { |
||||
return Some((i + 1, Err(( |
||||
i..=i, format!("The character that are allowed to be escaped in IRIs are _~.-!$&'()*+,;=/?#@%, found '{a}'") |
||||
).into()))); |
||||
} |
||||
let buffer = buffer.get_or_insert_with(String::new); |
||||
// We add the missing bytes
|
||||
if i - position_that_is_already_in_buffer > 1 { |
||||
buffer.push_str( |
||||
match str_from_utf8( |
||||
&data[position_that_is_already_in_buffer..i - 1], |
||||
position_that_is_already_in_buffer..i - 1, |
||||
) { |
||||
Ok(data) => data, |
||||
Err(e) => return Some((i, Err(e))), |
||||
}, |
||||
) |
||||
} |
||||
buffer.push(a); |
||||
i += 1; |
||||
position_that_is_already_in_buffer = i; |
||||
ends_with_unescaped_dot = 0; |
||||
} else if i == 0 { |
||||
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit()) |
||||
{ |
||||
return Some((0, Ok((Cow::Borrowed(""), false)))); |
||||
} |
||||
if !self.unchecked { |
||||
might_be_invalid_iri |= |
||||
Self::is_possible_pn_chars_base_but_not_valid_iri(c) |
||||
|| c == ':'; |
||||
} |
||||
i += consumed; |
||||
} else if Self::is_possible_pn_chars(c) || c == ':' { |
||||
if !self.unchecked { |
||||
might_be_invalid_iri |= |
||||
Self::is_possible_pn_chars_base_but_not_valid_iri(c) |
||||
|| c == ':'; |
||||
} |
||||
i += consumed; |
||||
ends_with_unescaped_dot = 0; |
||||
} else if c == '.' { |
||||
i += consumed; |
||||
ends_with_unescaped_dot += 1; |
||||
} else { |
||||
let buffer = if let Some(mut buffer) = buffer { |
||||
buffer.push_str( |
||||
match str_from_utf8( |
||||
&data[position_that_is_already_in_buffer..i], |
||||
position_that_is_already_in_buffer..i, |
||||
) { |
||||
Ok(data) => data, |
||||
Err(e) => return Some((i, Err(e))), |
||||
}, |
||||
); |
||||
// We do not include the last dots
|
||||
for _ in 0..ends_with_unescaped_dot { |
||||
buffer.pop(); |
||||
} |
||||
i -= ends_with_unescaped_dot; |
||||
Cow::Owned(buffer) |
||||
} else { |
||||
let mut data = match str_from_utf8(&data[..i], 0..i) { |
||||
Ok(data) => data, |
||||
Err(e) => return Some((i, Err(e))), |
||||
}; |
||||
// We do not include the last dots
|
||||
data = &data[..data.len() - ends_with_unescaped_dot]; |
||||
i -= ends_with_unescaped_dot; |
||||
Cow::Borrowed(data) |
||||
}; |
||||
return Some((i, Ok((buffer, might_be_invalid_iri)))); |
||||
} |
||||
} |
||||
Err(e) => return Some((e.location.end, Err(e))), |
||||
} |
||||
} else if is_ending { |
||||
let buffer = if let Some(mut buffer) = buffer { |
||||
// We do not include the last dot
|
||||
while buffer.ends_with('.') { |
||||
buffer.pop(); |
||||
i -= 1; |
||||
} |
||||
Cow::Owned(buffer) |
||||
} else { |
||||
let mut data = match str_from_utf8(&data[..i], 0..i) { |
||||
Ok(data) => data, |
||||
Err(e) => return Some((i, Err(e))), |
||||
}; |
||||
// We do not include the last dot
|
||||
while let Some(d) = data.strip_suffix('.') { |
||||
data = d; |
||||
i -= 1; |
||||
} |
||||
Cow::Borrowed(data) |
||||
}; |
||||
return Some((i, Ok((buffer, might_be_invalid_iri)))); |
||||
} else { |
||||
return None; |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn recognize_blank_node_label( |
||||
data: &[u8], |
||||
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||
// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
|
||||
let mut i = 2; |
||||
loop { |
||||
match Self::recognize_unicode_char(&data[i..], i)? { |
||||
Ok((c, consumed)) => { |
||||
if (i == 2 && (Self::is_possible_pn_chars_u(c) || c.is_ascii_digit())) |
||||
|| (i > 2 && Self::is_possible_pn_chars(c)) |
||||
{ |
||||
// Ok
|
||||
} else if i > 2 && c == '.' { |
||||
if data[i - 1] == b'.' { |
||||
i -= 1; |
||||
return Some(( |
||||
i, |
||||
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), |
||||
)); |
||||
} |
||||
} else if i == 0 { |
||||
return Some(( |
||||
i, |
||||
Err((0..i, "A blank node ID should not be empty").into()), |
||||
)); |
||||
} else if data[i - 1] == b'.' { |
||||
i -= 1; |
||||
return Some(( |
||||
i, |
||||
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), |
||||
)); |
||||
} else { |
||||
return Some(( |
||||
i, |
||||
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), |
||||
)); |
||||
} |
||||
i += consumed; |
||||
} |
||||
Err(e) => return Some((e.location.end, Err(e))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn recognize_lang_tag<'a>( |
||||
&self, |
||||
data: &'a [u8], |
||||
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> { |
||||
// [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
|
||||
let mut is_last_block_empty = true; |
||||
for (i, c) in data[1..].iter().enumerate() { |
||||
if c.is_ascii_alphabetic() { |
||||
is_last_block_empty = false; |
||||
} else if i == 0 { |
||||
return Some(( |
||||
1, |
||||
Err((1..2, "A language code should always start with a letter").into()), |
||||
)); |
||||
} else if is_last_block_empty { |
||||
return Some((i, self.parse_lang_tag(&data[1..i], 1..i - 1))); |
||||
} else if *c == b'-' { |
||||
is_last_block_empty = true; |
||||
} else { |
||||
return Some((i + 1, self.parse_lang_tag(&data[1..=i], 1..i))); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
fn parse_lang_tag<'a>( |
||||
&self, |
||||
lang_tag: &'a [u8], |
||||
position: Range<usize>, |
||||
) -> Result<N3Token<'a>, TokenRecognizerError> { |
||||
let lang_tag = str_from_utf8(lang_tag, position.clone())?; |
||||
Ok(N3Token::LangTag(if self.unchecked { |
||||
lang_tag |
||||
} else { |
||||
LanguageTag::parse(lang_tag) |
||||
.map_err(|e| (position.clone(), e.to_string()))? |
||||
.into_inner() |
||||
})) |
||||
} |
||||
|
||||
fn recognize_string( |
||||
data: &[u8], |
||||
delimiter: u8, |
||||
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||
// [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
|
||||
// [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
|
||||
let mut string = String::new(); |
||||
let mut i = 1; |
||||
loop { |
||||
let end = memchr2(delimiter, b'\\', &data[i..])?; |
||||
match str_from_utf8(&data[i..i + end], i..i + end) { |
||||
Ok(s) => string.push_str(s), |
||||
Err(e) => return Some((end, Err(e))), |
||||
}; |
||||
i += end; |
||||
match data[i] { |
||||
c if c == delimiter => { |
||||
return Some((i + 1, Ok(N3Token::String(string)))); |
||||
} |
||||
b'\\' => { |
||||
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; |
||||
i += additional + 1; |
||||
match c { |
||||
Ok(c) => { |
||||
string.push(c); |
||||
} |
||||
Err(e) => { |
||||
// We read until the end of string char
|
||||
let end = memchr(delimiter, &data[i..])?; |
||||
return Some((i + end + 1, Err(e))); |
||||
} |
||||
} |
||||
} |
||||
_ => unreachable!(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn recognize_long_string( |
||||
data: &[u8], |
||||
delimiter: u8, |
||||
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> { |
||||
// [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''"
|
||||
// [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""'
|
||||
let mut string = String::new(); |
||||
let mut i = 3; |
||||
loop { |
||||
let end = memchr2(delimiter, b'\\', &data[i..])?; |
||||
match str_from_utf8(&data[i..i + end], i..i + end) { |
||||
Ok(s) => string.push_str(s), |
||||
Err(e) => return Some((end, Err(e))), |
||||
}; |
||||
i += end; |
||||
match data[i] { |
||||
c if c == delimiter => { |
||||
if *data.get(i + 1)? == delimiter && *data.get(i + 2)? == delimiter { |
||||
return Some((i + 3, Ok(N3Token::String(string)))); |
||||
} |
||||
i += 1; |
||||
string.push(char::from(delimiter)); |
||||
} |
||||
b'\\' => { |
||||
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?; |
||||
i += additional + 1; |
||||
match c { |
||||
Ok(c) => { |
||||
string.push(c); |
||||
} |
||||
Err(e) => return Some((i, Err(e))), |
||||
} |
||||
} |
||||
_ => unreachable!(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn recognize_number(data: &[u8]) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> { |
||||
// [19] INTEGER ::= [+-]? [0-9]+
|
||||
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
|
||||
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
|
||||
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
|
||||
let mut i = 0; |
||||
let c = *data.first()?; |
||||
if matches!(c, b'+' | b'-') { |
||||
i += 1; |
||||
} |
||||
// We read the digits before .
|
||||
let mut count_before: usize = 0; |
||||
loop { |
||||
let c = *data.get(i)?; |
||||
if c.is_ascii_digit() { |
||||
i += 1; |
||||
count_before += 1; |
||||
} else { |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// We read the digits after .
|
||||
#[allow(clippy::if_then_some_else_none)] |
||||
let count_after = if *data.get(i)? == b'.' { |
||||
i += 1; |
||||
|
||||
let mut count_after = 0; |
||||
loop { |
||||
let c = *data.get(i)?; |
||||
if c.is_ascii_digit() { |
||||
i += 1; |
||||
count_after += 1; |
||||
} else { |
||||
break; |
||||
} |
||||
} |
||||
Some(count_after) |
||||
} else { |
||||
None |
||||
}; |
||||
|
||||
// End
|
||||
let c = *data.get(i)?; |
||||
if matches!(c, b'e' | b'E') { |
||||
i += 1; |
||||
|
||||
let c = *data.get(i)?; |
||||
if matches!(c, b'+' | b'-') { |
||||
i += 1; |
||||
} |
||||
|
||||
let mut found = false; |
||||
loop { |
||||
let c = *data.get(i)?; |
||||
if c.is_ascii_digit() { |
||||
i += 1; |
||||
found = true; |
||||
} else { |
||||
break; |
||||
} |
||||
} |
||||
Some(( |
||||
i, |
||||
if !found { |
||||
Err((0..i, "A double exponent cannot be empty").into()) |
||||
} else if count_before == 0 && count_after.unwrap_or(0) == 0 { |
||||
Err((0..i, "A double should not be empty").into()) |
||||
} else { |
||||
str_from_utf8(&data[..i], 0..i).map(N3Token::Double) |
||||
}, |
||||
)) |
||||
} else if let Some(count_after) = count_after { |
||||
if count_after == 0 { |
||||
// We do not consume the '.' after all
|
||||
i -= 1; |
||||
Some(( |
||||
i, |
||||
if count_before == 0 { |
||||
Err((0..i, "An integer should not be empty").into()) |
||||
} else { |
||||
str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) |
||||
}, |
||||
)) |
||||
} else { |
||||
Some((i, str_from_utf8(&data[..i], 0..i).map(N3Token::Decimal))) |
||||
} |
||||
} else { |
||||
Some(( |
||||
i, |
||||
if count_before == 0 { |
||||
Err((0..i, "An integer should not be empty").into()) |
||||
} else { |
||||
str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) |
||||
}, |
||||
)) |
||||
} |
||||
} |
||||
|
||||
fn recognize_escape( |
||||
data: &[u8], |
||||
position: usize, |
||||
with_echar: bool, |
||||
) -> Option<(usize, Result<char, TokenRecognizerError>)> { |
||||
// [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX
|
||||
// [159s] ECHAR ::= '\' [tbnrf"'\]
|
||||
match *data.get(1)? { |
||||
b'u' => match Self::recognize_hex_char(&data[2..], 4, 'u', position) { |
||||
Ok(c) => Some((5, Ok(c?))), |
||||
Err(e) => Some((5, Err(e))), |
||||
}, |
||||
b'U' => match Self::recognize_hex_char(&data[2..], 8, 'u', position) { |
||||
Ok(c) => Some((9, Ok(c?))), |
||||
Err(e) => Some((9, Err(e))), |
||||
}, |
||||
b't' if with_echar => Some((1, Ok('\t'))), |
||||
b'b' if with_echar => Some((1, Ok('\x08'))), |
||||
b'n' if with_echar => Some((1, Ok('\n'))), |
||||
b'r' if with_echar => Some((1, Ok('\r'))), |
||||
b'f' if with_echar => Some((1, Ok('\x0C'))), |
||||
b'"' if with_echar => Some((1, Ok('"'))), |
||||
b'\'' if with_echar => Some((1, Ok('\''))), |
||||
b'\\' if with_echar => Some((1, Ok('\\'))), |
||||
c => Some(( |
||||
1, |
||||
Err(( |
||||
position..position + 2, |
||||
format!("Unexpected escape character '\\{}'", char::from(c)), |
||||
) |
||||
.into()), |
||||
)), // TODO: read until end of string
|
||||
} |
||||
} |
||||
|
||||
fn recognize_hex_char( |
||||
data: &[u8], |
||||
len: usize, |
||||
escape_char: char, |
||||
position: usize, |
||||
) -> Result<Option<char>, TokenRecognizerError> { |
||||
if data.len() < len { |
||||
return Ok(None); |
||||
} |
||||
let val = str_from_utf8(&data[..len], position..position + len + 2)?; |
||||
let codepoint = u32::from_str_radix(val, 16).map_err(|e| { |
||||
( |
||||
position..position + len + 2, |
||||
format!( |
||||
"The escape sequence '\\{escape_char}{val}' is not a valid hexadecimal string: {e}" |
||||
), |
||||
) |
||||
})?; |
||||
let c = char::from_u32(codepoint).ok_or_else(|| { |
||||
( |
||||
position..position + len +2, |
||||
format!( |
||||
"The escape sequence '\\{escape_char}{val}' is encoding {codepoint:X} that is not a valid unicode character", |
||||
), |
||||
) |
||||
})?; |
||||
Ok(Some(c)) |
||||
} |
||||
|
||||
fn recognize_unicode_char( |
||||
data: &[u8], |
||||
position: usize, |
||||
) -> Option<Result<(char, usize), TokenRecognizerError>> { |
||||
let mut code_point: u32; |
||||
let bytes_needed: usize; |
||||
let mut lower_boundary = 0x80; |
||||
let mut upper_boundary = 0xBF; |
||||
|
||||
let byte = *data.first()?; |
||||
match byte { |
||||
0x00..=0x7F => return Some(Ok((char::from(byte), 1))), |
||||
0xC2..=0xDF => { |
||||
bytes_needed = 1; |
||||
code_point = u32::from(byte) & 0x1F; |
||||
} |
||||
0xE0..=0xEF => { |
||||
if byte == 0xE0 { |
||||
lower_boundary = 0xA0; |
||||
} |
||||
if byte == 0xED { |
||||
upper_boundary = 0x9F; |
||||
} |
||||
bytes_needed = 2; |
||||
code_point = u32::from(byte) & 0xF; |
||||
} |
||||
0xF0..=0xF4 => { |
||||
if byte == 0xF0 { |
||||
lower_boundary = 0x90; |
||||
} |
||||
if byte == 0xF4 { |
||||
upper_boundary = 0x8F; |
||||
} |
||||
bytes_needed = 3; |
||||
code_point = u32::from(byte) & 0x7; |
||||
} |
||||
_ => { |
||||
return Some(Err(( |
||||
position..=position, |
||||
"Invalid UTF-8 character encoding", |
||||
) |
||||
.into())) |
||||
} |
||||
} |
||||
|
||||
for i in 1..=bytes_needed { |
||||
let byte = *data.get(i)?; |
||||
if byte < lower_boundary || upper_boundary < byte { |
||||
return Some(Err(( |
||||
position..=position + i, |
||||
"Invalid UTF-8 character encoding", |
||||
) |
||||
.into())); |
||||
} |
||||
lower_boundary = 0x80; |
||||
upper_boundary = 0xBF; |
||||
code_point = (code_point << 6) | (u32::from(byte) & 0x3F); |
||||
} |
||||
|
||||
Some( |
||||
char::from_u32(code_point) |
||||
.map(|c| (c, bytes_needed + 1)) |
||||
.ok_or_else(|| { |
||||
( |
||||
position..=position + bytes_needed, |
||||
format!("The codepoint {code_point:X} is not a valid unicode character"), |
||||
) |
||||
.into() |
||||
}), |
||||
) |
||||
} |
||||
|
||||
// [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
fn is_possible_pn_chars_base(c: char) -> bool { |
||||
matches!(c, |
||||
'A'..='Z' |
||||
| 'a'..='z' |
||||
| '\u{00C0}'..='\u{00D6}' |
||||
| '\u{00D8}'..='\u{00F6}' |
||||
| '\u{00F8}'..='\u{02FF}' |
||||
| '\u{0370}'..='\u{037D}' |
||||
| '\u{037F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}') |
||||
} |
||||
|
||||
// [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':'
|
||||
pub(super) fn is_possible_pn_chars_u(c: char) -> bool { |
||||
Self::is_possible_pn_chars_base(c) || c == '_' |
||||
} |
||||
|
||||
// [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||
pub(crate) fn is_possible_pn_chars(c: char) -> bool { |
||||
Self::is_possible_pn_chars_u(c) |
||||
|| matches!(c, |
||||
'-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||
} |
||||
|
||||
fn is_possible_pn_chars_base_but_not_valid_iri(c: char) -> bool { |
||||
matches!(c, '\u{FFF0}'..='\u{FFFD}') |
||||
|| u32::from(c) % u32::from('\u{FFFE}') == 0 |
||||
|| u32::from(c) % u32::from('\u{FFFF}') == 0 |
||||
} |
||||
} |
||||
|
||||
pub fn resolve_local_name( |
||||
prefix: &str, |
||||
local: &str, |
||||
might_be_invalid_iri: bool, |
||||
prefixes: &HashMap<String, Iri<String>>, |
||||
) -> Result<NamedNode, String> { |
||||
if let Some(start) = prefixes.get(prefix) { |
||||
let iri = format!("{start}{local}"); |
||||
if might_be_invalid_iri || start.path().is_empty() { |
||||
// We validate again. We always validate if the local part might be the IRI authority.
|
||||
if let Err(e) = Iri::parse(iri.as_str()) { |
||||
return Err(format!( |
||||
"The prefixed name {prefix}:{local} builds IRI {iri} that is invalid: {e}" |
||||
)); |
||||
} |
||||
} |
||||
Ok(NamedNode::new_unchecked(iri)) |
||||
} else { |
||||
Err(format!("The prefix {prefix}: has not been declared")) |
||||
} |
||||
} |
||||
|
||||
fn str_from_utf8(data: &[u8], range: Range<usize>) -> Result<&str, TokenRecognizerError> { |
||||
str::from_utf8(data).map_err(|e| { |
||||
( |
||||
range.start + e.valid_up_to()..min(range.end, range.start + e.valid_up_to() + 4), |
||||
format!("Invalid UTF-8: {e}"), |
||||
) |
||||
.into() |
||||
}) |
||||
} |
||||
|
||||
fn string_from_utf8(data: Vec<u8>, range: Range<usize>) -> Result<String, TokenRecognizerError> { |
||||
String::from_utf8(data).map_err(|e| { |
||||
( |
||||
range.start + e.utf8_error().valid_up_to() |
||||
..min(range.end, range.start + e.utf8_error().valid_up_to() + 4), |
||||
format!("Invalid UTF-8: {e}"), |
||||
) |
||||
.into() |
||||
}) |
||||
} |
@ -0,0 +1,314 @@ |
||||
//! Shared parser implementation for N-Triples and N-Quads.
|
||||
|
||||
#[cfg(feature = "rdf-star")] |
||||
use crate::oxrdf::Triple; |
||||
use crate::oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term}; |
||||
use crate::oxttl::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token}; |
||||
use crate::oxttl::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError}; |
||||
use crate::oxttl::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE}; |
||||
|
||||
pub struct NQuadsRecognizer { |
||||
stack: Vec<NQuadsState>, |
||||
subjects: Vec<Subject>, |
||||
predicates: Vec<NamedNode>, |
||||
objects: Vec<Term>, |
||||
} |
||||
pub struct NQuadsRecognizerContext { |
||||
with_graph_name: bool, |
||||
#[cfg(feature = "rdf-star")] |
||||
with_quoted_triples: bool, |
||||
lexer_options: N3LexerOptions, |
||||
} |
||||
|
||||
enum NQuadsState { |
||||
ExpectSubject, |
||||
ExpectPredicate, |
||||
ExpectedObject, |
||||
ExpectPossibleGraphOrEndOfQuotedTriple, |
||||
ExpectDot, |
||||
ExpectLiteralAnnotationOrGraphNameOrDot { |
||||
value: String, |
||||
}, |
||||
ExpectLiteralDatatype { |
||||
value: String, |
||||
}, |
||||
#[cfg(feature = "rdf-star")] |
||||
AfterQuotedSubject, |
||||
#[cfg(feature = "rdf-star")] |
||||
AfterQuotedObject, |
||||
} |
||||
|
||||
impl RuleRecognizer for NQuadsRecognizer { |
||||
type TokenRecognizer = N3Lexer; |
||||
type Output = Quad; |
||||
type Context = NQuadsRecognizerContext; |
||||
|
||||
fn error_recovery_state(mut self) -> Self { |
||||
self.stack.clear(); |
||||
self.subjects.clear(); |
||||
self.predicates.clear(); |
||||
self.objects.clear(); |
||||
self |
||||
} |
||||
|
||||
fn recognize_next( |
||||
mut self, |
||||
token: N3Token<'_>, |
||||
context: &mut NQuadsRecognizerContext, |
||||
results: &mut Vec<Quad>, |
||||
errors: &mut Vec<RuleRecognizerError>, |
||||
) -> Self { |
||||
if let Some(state) = self.stack.pop() { |
||||
match state { |
||||
NQuadsState::ExpectSubject => match token { |
||||
N3Token::IriRef(s) => { |
||||
self.subjects |
||||
.push(NamedNode::new_unchecked(s).into()); |
||||
self.stack.push(NQuadsState::ExpectPredicate); |
||||
self |
||||
} |
||||
N3Token::BlankNodeLabel(s) => { |
||||
self.subjects.push(BlankNode::new_unchecked(s).into()); |
||||
self.stack.push(NQuadsState::ExpectPredicate); |
||||
self |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Token::Punctuation("<<") if context.with_quoted_triples => { |
||||
self.stack.push(NQuadsState::AfterQuotedSubject); |
||||
self.stack.push(NQuadsState::ExpectSubject); |
||||
self |
||||
} |
||||
_ => self.error( |
||||
errors, |
||||
"The subject of a triple should be an IRI or a blank node, TOKEN found", |
||||
), |
||||
}, |
||||
NQuadsState::ExpectPredicate => match token { |
||||
N3Token::IriRef(p) => { |
||||
self.predicates |
||||
.push(NamedNode::new_unchecked(p)); |
||||
self.stack.push(NQuadsState::ExpectedObject); |
||||
self |
||||
} |
||||
_ => self.error( |
||||
errors, |
||||
"The predicate of a triple should be an IRI, TOKEN found", |
||||
), |
||||
}, |
||||
NQuadsState::ExpectedObject => match token { |
||||
N3Token::IriRef(o) => { |
||||
self.objects |
||||
.push(NamedNode::new_unchecked(o).into()); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self |
||||
} |
||||
N3Token::BlankNodeLabel(o) => { |
||||
self.objects.push(BlankNode::new_unchecked(o).into()); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self |
||||
} |
||||
N3Token::String(value) => { |
||||
self.stack |
||||
.push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }); |
||||
self |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
N3Token::Punctuation("<<") if context.with_quoted_triples => { |
||||
self.stack.push(NQuadsState::AfterQuotedObject); |
||||
self.stack.push(NQuadsState::ExpectSubject); |
||||
self |
||||
} |
||||
_ => self.error( |
||||
errors, |
||||
"The object of a triple should be an IRI, a blank node or a literal, TOKEN found", |
||||
), |
||||
}, |
||||
NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token { |
||||
N3Token::LangTag(lang_tag) => { |
||||
self.objects.push( |
||||
Literal::new_language_tagged_literal_unchecked( |
||||
value, |
||||
lang_tag.to_ascii_lowercase(), |
||||
) |
||||
.into(), |
||||
); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self |
||||
} |
||||
N3Token::Punctuation("^^") => { |
||||
self.stack |
||||
.push(NQuadsState::ExpectLiteralDatatype { value }); |
||||
self |
||||
} |
||||
_ => { |
||||
self.objects.push(Literal::new_simple_literal(value).into()); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self.recognize_next(token, context, results, errors) |
||||
} |
||||
}, |
||||
NQuadsState::ExpectLiteralDatatype { value } => match token { |
||||
N3Token::IriRef(d) => { |
||||
self.objects.push( |
||||
Literal::new_typed_literal( |
||||
value, |
||||
NamedNode::new_unchecked(d) |
||||
) |
||||
.into(), |
||||
); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self |
||||
} |
||||
_ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"), |
||||
}, |
||||
NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => { |
||||
if self.stack.is_empty() { |
||||
match token { |
||||
N3Token::IriRef(g) if context.with_graph_name => { |
||||
self.emit_quad( |
||||
results, |
||||
NamedNode::new_unchecked(g).into(), |
||||
); |
||||
self.stack.push(NQuadsState::ExpectDot); |
||||
self |
||||
} |
||||
N3Token::BlankNodeLabel(g) if context.with_graph_name => { |
||||
self.emit_quad(results, BlankNode::new_unchecked(g).into()); |
||||
self.stack.push(NQuadsState::ExpectDot); |
||||
self |
||||
} |
||||
_ => { |
||||
self.emit_quad(results, GraphName::DefaultGraph); |
||||
self.stack.push(NQuadsState::ExpectDot); |
||||
self.recognize_next(token, context, results, errors) |
||||
} |
||||
} |
||||
} else if token == N3Token::Punctuation(">>") { |
||||
self |
||||
} else { |
||||
self.error(errors, "Expecting the end of a quoted triple '>>'") |
||||
} |
||||
} |
||||
NQuadsState::ExpectDot => if let N3Token::Punctuation(".") = token { |
||||
self.stack.push(NQuadsState::ExpectSubject); |
||||
self |
||||
} else { |
||||
errors.push("Quads should be followed by a dot".into()); |
||||
self.stack.push(NQuadsState::ExpectSubject); |
||||
self.recognize_next(token, context, results, errors) |
||||
}, |
||||
#[cfg(feature = "rdf-star")] |
||||
NQuadsState::AfterQuotedSubject => { |
||||
let triple = Triple { |
||||
subject: self.subjects.pop().unwrap(), |
||||
predicate: self.predicates.pop().unwrap(), |
||||
object: self.objects.pop().unwrap(), |
||||
}; |
||||
self.subjects.push(triple.into()); |
||||
self.stack.push(NQuadsState::ExpectPredicate); |
||||
self.recognize_next(token,context, results, errors) |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
NQuadsState::AfterQuotedObject => { |
||||
let triple = Triple { |
||||
subject: self.subjects.pop().unwrap(), |
||||
predicate: self.predicates.pop().unwrap(), |
||||
object: self.objects.pop().unwrap(), |
||||
}; |
||||
self.objects.push(triple.into()); |
||||
self.stack |
||||
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); |
||||
self.recognize_next(token, context, results, errors) |
||||
} |
||||
} |
||||
} else if token == N3Token::Punctuation(".") { |
||||
self.stack.push(NQuadsState::ExpectSubject); |
||||
self |
||||
} else { |
||||
self |
||||
} |
||||
} |
||||
|
||||
fn recognize_end( |
||||
mut self, |
||||
_context: &mut NQuadsRecognizerContext, |
||||
results: &mut Vec<Quad>, |
||||
errors: &mut Vec<RuleRecognizerError>, |
||||
) { |
||||
match &*self.stack { |
||||
[NQuadsState::ExpectSubject] | [] => (), |
||||
[NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()), |
||||
[NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => { |
||||
self.emit_quad(results, GraphName::DefaultGraph); |
||||
errors.push("Triples should be followed by a dot".into()) |
||||
} |
||||
[NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }] => { |
||||
self.objects.push(Literal::new_simple_literal(value).into()); |
||||
self.emit_quad(results, GraphName::DefaultGraph); |
||||
errors.push("Triples should be followed by a dot".into()) |
||||
} |
||||
_ => errors.push("Unexpected end".into()), // TODO
|
||||
} |
||||
} |
||||
|
||||
fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions { |
||||
&context.lexer_options |
||||
} |
||||
} |
||||
|
||||
impl NQuadsRecognizer { |
||||
pub fn new_parser( |
||||
with_graph_name: bool, |
||||
#[cfg(feature = "rdf-star")] with_quoted_triples: bool, |
||||
unchecked: bool, |
||||
) -> Parser<Self> { |
||||
Parser::new( |
||||
Lexer::new( |
||||
N3Lexer::new(N3LexerMode::NTriples, unchecked), |
||||
MIN_BUFFER_SIZE, |
||||
MAX_BUFFER_SIZE, |
||||
true, |
||||
Some(b"#"), |
||||
), |
||||
Self { |
||||
stack: vec![NQuadsState::ExpectSubject], |
||||
subjects: Vec::new(), |
||||
predicates: Vec::new(), |
||||
objects: Vec::new(), |
||||
}, |
||||
NQuadsRecognizerContext { |
||||
with_graph_name, |
||||
#[cfg(feature = "rdf-star")] |
||||
with_quoted_triples, |
||||
lexer_options: N3LexerOptions::default(), |
||||
}, |
||||
) |
||||
} |
||||
|
||||
#[must_use] |
||||
fn error( |
||||
mut self, |
||||
errors: &mut Vec<RuleRecognizerError>, |
||||
msg: impl Into<RuleRecognizerError>, |
||||
) -> Self { |
||||
errors.push(msg.into()); |
||||
self.stack.clear(); |
||||
self.subjects.clear(); |
||||
self.predicates.clear(); |
||||
self.objects.clear(); |
||||
self |
||||
} |
||||
|
||||
fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) { |
||||
results.push(Quad { |
||||
subject: self.subjects.pop().unwrap(), |
||||
predicate: self.predicates.pop().unwrap(), |
||||
object: self.objects.pop().unwrap(), |
||||
graph_name, |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,19 @@ |
||||
mod lexer; |
||||
mod line_formats; |
||||
pub mod n3; |
||||
pub mod nquads; |
||||
pub mod ntriples; |
||||
mod terse; |
||||
mod toolkit; |
||||
pub mod trig; |
||||
pub mod turtle; |
||||
|
||||
pub use crate::oxttl::n3::N3Parser; |
||||
pub use crate::oxttl::nquads::{NQuadsParser, NQuadsSerializer}; |
||||
pub use crate::oxttl::ntriples::{NTriplesParser, NTriplesSerializer}; |
||||
pub use crate::oxttl::toolkit::{TextPosition, TurtleParseError, TurtleSyntaxError}; |
||||
pub use crate::oxttl::trig::{TriGParser, TriGSerializer}; |
||||
pub use crate::oxttl::turtle::{TurtleParser, TurtleSerializer}; |
||||
|
||||
pub(crate) const MIN_BUFFER_SIZE: usize = 4096; |
||||
pub(crate) const MAX_BUFFER_SIZE: usize = 4096 * 4096; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,564 @@ |
||||
//! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`]
|
||||
//! and a serializer implemented by [`NQuadsSerializer`].
|
||||
|
||||
use crate::oxrdf::{Quad, QuadRef}; |
||||
use crate::oxttl::line_formats::NQuadsRecognizer; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::toolkit::FromTokioAsyncReadIterator; |
||||
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; |
||||
use std::io::{self, Read, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; |
||||
|
||||
/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser.
|
||||
///
|
||||
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature and the [`NQuadsParser::with_quoted_triples`] option.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
|
||||
/// let quad = quad?;
|
||||
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct NQuadsParser { |
||||
unchecked: bool, |
||||
#[cfg(feature = "rdf-star")] |
||||
with_quoted_triples: bool, |
||||
} |
||||
|
||||
impl NQuadsParser { |
||||
/// Builds a new [`NQuadsParser`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self::default() |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.unchecked = true; |
||||
self |
||||
} |
||||
|
||||
/// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
|
||||
#[cfg(feature = "rdf-star")] |
||||
#[inline] |
||||
pub fn with_quoted_triples(mut self) -> Self { |
||||
self.with_quoted_triples = true; |
||||
self |
||||
} |
||||
|
||||
/// Parses a N-Quads file from a [`Read`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
|
||||
/// let quad = quad?;
|
||||
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, read: R) -> FromReadNQuadsReader<R> { |
||||
FromReadNQuadsReader { |
||||
inner: self.parse().parser.parse_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Quads file from a [`AsyncRead`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
read: R, |
||||
) -> FromTokioAsyncReadNQuadsReader<R> { |
||||
FromTokioAsyncReadNQuadsReader { |
||||
inner: self.parse().parser.parse_tokio_async_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Allows to parse a N-Quads file by using a low-level API.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// let file: [&[u8]; 4] = [
|
||||
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NQuadsParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many quads from the parser as possible
|
||||
/// while let Some(quad) = parser.read_next() {
|
||||
/// let quad = quad?;
|
||||
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn parse(self) -> LowLevelNQuadsReader { |
||||
LowLevelNQuadsReader { |
||||
parser: NQuadsRecognizer::new_parser( |
||||
true, |
||||
#[cfg(feature = "rdf-star")] |
||||
self.with_quoted_triples, |
||||
self.unchecked, |
||||
), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Quads file from a [`Read`] implementation. Can be built using [`NQuadsParser::parse_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
|
||||
/// let quad = quad?;
|
||||
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadNQuadsReader<R: Read> { |
||||
inner: FromReadIterator<R, NQuadsRecognizer>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadNQuadsReader<R> { |
||||
type Item = Result<Quad, TurtleParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
self.inner.next() |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Quads file from a [`AsyncRead`] implementation. Can be built using [`NQuadsParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct FromTokioAsyncReadNQuadsReader<R: AsyncRead + Unpin> { |
||||
inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadNQuadsReader<R> { |
||||
/// Reads the next triple or returns `None` if the file is finished.
|
||||
pub async fn next(&mut self) -> Option<Result<Quad, TurtleParseError>> { |
||||
Some(self.inner.next().await?.map(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Quads file by using a low-level API. Can be built using [`NQuadsParser::parse`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NQuadsParser;
|
||||
///
|
||||
/// let file: [&[u8]; 4] = [
|
||||
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NQuadsParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many quads from the parser as possible
|
||||
/// while let Some(quad) = parser.read_next() {
|
||||
/// let quad = quad?;
|
||||
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelNQuadsReader { |
||||
parser: Parser<NQuadsRecognizer>, |
||||
} |
||||
|
||||
impl LowLevelNQuadsReader { |
||||
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||
self.parser.extend_from_slice(other) |
||||
} |
||||
|
||||
/// Tell the parser that the file is finished.
|
||||
///
|
||||
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||
pub fn end(&mut self) { |
||||
self.parser.end() |
||||
} |
||||
|
||||
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||
pub fn is_end(&self) -> bool { |
||||
self.parser.is_end() |
||||
} |
||||
|
||||
/// Attempt to parse a new quad from the already provided data.
|
||||
///
|
||||
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||
pub fn read_next(&mut self) -> Option<Result<Quad, TurtleSyntaxError>> { |
||||
self.parser.read_next() |
||||
} |
||||
} |
||||
|
||||
/// A [N-Quads](https://www.w3.org/TR/n-quads/) serializer.
|
||||
///
|
||||
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// NamedNodeRef::new("http://example.com")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct NQuadsSerializer; |
||||
|
||||
impl NQuadsSerializer { |
||||
/// Builds a new [`NQuadsSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self |
||||
} |
||||
|
||||
/// Writes a N-Quads file to a [`Write`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// NamedNodeRef::new("http://example.com")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNQuadsWriter<W> { |
||||
ToWriteNQuadsWriter { |
||||
write, |
||||
writer: self.serialize(), |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Quads file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
|
||||
/// NamedNodeRef::new_unchecked("http://example.com"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteNQuadsWriter<W> { |
||||
ToTokioAsyncWriteNQuadsWriter { |
||||
write, |
||||
writer: self.serialize(), |
||||
buffer: Vec::new(), |
||||
} |
||||
} |
||||
|
||||
/// Builds a low-level N-Quads writer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = NQuadsSerializer::new().serialize();
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// NamedNodeRef::new("http://example.com")?,
|
||||
/// ), &mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize(self) -> LowLevelNQuadsWriter { |
||||
LowLevelNQuadsWriter |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Quads file to a [`Write`] implementation. Can be built using [`NQuadsSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// NamedNodeRef::new("http://example.com")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteNQuadsWriter<W: Write> { |
||||
write: W, |
||||
writer: LowLevelNQuadsWriter, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteNQuadsWriter<W> { |
||||
/// Writes an extra quad.
|
||||
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_quad(q, &mut self.write) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Quads file to a [`AsyncWrite`] implementation. Can be built using [`NQuadsSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
|
||||
/// NamedNodeRef::new_unchecked("http://example.com"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteNQuadsWriter<W: AsyncWrite + Unpin> { |
||||
write: W, |
||||
writer: LowLevelNQuadsWriter, |
||||
buffer: Vec<u8>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNQuadsWriter<W> { |
||||
/// Writes an extra quad.
|
||||
pub async fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_quad(q, &mut self.buffer)?; |
||||
self.write.write_all(&self.buffer).await?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Quads file by using a low-level API. Can be built using [`NQuadsSerializer::serialize`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, QuadRef};
|
||||
/// use oxttl::NQuadsSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = NQuadsSerializer::new().serialize();
|
||||
/// writer.write_quad(QuadRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// NamedNodeRef::new("http://example.com")?,
|
||||
/// ), &mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelNQuadsWriter; |
||||
|
||||
impl LowLevelNQuadsWriter { |
||||
/// Writes an extra quad.
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn write_quad<'a>( |
||||
&mut self, |
||||
q: impl Into<QuadRef<'a>>, |
||||
mut write: impl Write, |
||||
) -> io::Result<()> { |
||||
writeln!(write, "{} .", q.into()) |
||||
} |
||||
} |
@ -0,0 +1,580 @@ |
||||
//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`]
|
||||
//! and a serializer implemented by [`NTriplesSerializer`].
|
||||
|
||||
use crate::oxrdf::{Triple, TripleRef}; |
||||
use crate::oxttl::line_formats::NQuadsRecognizer; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::toolkit::FromTokioAsyncReadIterator; |
||||
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; |
||||
use std::io::{self, Read, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; |
||||
|
||||
/// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser.
|
||||
///
|
||||
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct NTriplesParser { |
||||
unchecked: bool, |
||||
#[cfg(feature = "rdf-star")] |
||||
with_quoted_triples: bool, |
||||
} |
||||
|
||||
impl NTriplesParser { |
||||
/// Builds a new [`NTriplesParser`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self::default() |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. ///
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.unchecked = true; |
||||
self |
||||
} |
||||
|
||||
/// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
|
||||
#[cfg(feature = "rdf-star")] |
||||
#[inline] |
||||
pub fn with_quoted_triples(mut self) -> Self { |
||||
self.with_quoted_triples = true; |
||||
self |
||||
} |
||||
|
||||
/// Parses a N-Triples file from a [`Read`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, read: R) -> FromReadNTriplesReader<R> { |
||||
FromReadNTriplesReader { |
||||
inner: self.parse().parser.parse_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Triples file from a [`AsyncRead`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
read: R, |
||||
) -> FromTokioAsyncReadNTriplesReader<R> { |
||||
FromTokioAsyncReadNTriplesReader { |
||||
inner: self.parse().parser.parse_tokio_async_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Allows to parse a N-Triples file by using a low-level API.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// let file: [&[u8]; 4] = [
|
||||
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NTriplesParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many triples from the parser as possible
|
||||
/// while let Some(triple) = parser.read_next() {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn parse(self) -> LowLevelNTriplesReader { |
||||
LowLevelNTriplesReader { |
||||
parser: NQuadsRecognizer::new_parser( |
||||
false, |
||||
#[cfg(feature = "rdf-star")] |
||||
self.with_quoted_triples, |
||||
self.unchecked, |
||||
), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadNTriplesReader<R: Read> { |
||||
inner: FromReadIterator<R, NQuadsRecognizer>, |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadNTriplesReader<R> { |
||||
type Item = Result<Triple, TurtleParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.inner.next()?.map(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Triples file from a [`AsyncRead`] implementation. Can be built using [`NTriplesParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
|
||||
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
|
||||
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct FromTokioAsyncReadNTriplesReader<R: AsyncRead + Unpin> { |
||||
inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadNTriplesReader<R> { |
||||
/// Reads the next triple or returns `None` if the file is finished.
|
||||
pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> { |
||||
Some(self.inner.next().await?.map(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, vocab::rdf};
|
||||
/// use oxttl::NTriplesParser;
|
||||
///
|
||||
/// let file: [&[u8]; 4] = [
|
||||
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
|
||||
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = NTriplesParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many triples from the parser as possible
|
||||
/// while let Some(triple) = parser.read_next() {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelNTriplesReader { |
||||
parser: Parser<NQuadsRecognizer>, |
||||
} |
||||
|
||||
impl LowLevelNTriplesReader { |
||||
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||
self.parser.extend_from_slice(other) |
||||
} |
||||
|
||||
/// Tell the parser that the file is finished.
|
||||
///
|
||||
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||
pub fn end(&mut self) { |
||||
self.parser.end() |
||||
} |
||||
|
||||
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||
pub fn is_end(&self) -> bool { |
||||
self.parser.is_end() |
||||
} |
||||
|
||||
/// Attempt to parse a new triple from the already provided data.
|
||||
///
|
||||
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||
pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> { |
||||
Some(self.parser.read_next()?.map(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// A [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) serializer.
|
||||
///
|
||||
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct NTriplesSerializer; |
||||
|
||||
impl NTriplesSerializer { |
||||
/// Builds a new [`NTriplesSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self |
||||
} |
||||
|
||||
/// Writes a N-Triples file to a [`Write`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNTriplesWriter<W> { |
||||
ToWriteNTriplesWriter { |
||||
write, |
||||
writer: self.serialize(), |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Triples file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteNTriplesWriter<W> { |
||||
ToTokioAsyncWriteNTriplesWriter { |
||||
write, |
||||
writer: self.serialize(), |
||||
buffer: Vec::new(), |
||||
} |
||||
} |
||||
|
||||
/// Builds a low-level N-Triples writer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = NTriplesSerializer::new().serialize();
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ), &mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize(self) -> LowLevelNTriplesWriter { |
||||
LowLevelNTriplesWriter |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteNTriplesWriter<W: Write> { |
||||
write: W, |
||||
writer: LowLevelNTriplesWriter, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteNTriplesWriter<W> { |
||||
/// Writes an extra triple.
|
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_triple(t, &mut self.write) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Triples file to a [`AsyncWrite`] implementation. Can be built using [`NTriplesSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person")
|
||||
/// )).await?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// writer.finish().as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteNTriplesWriter<W: AsyncWrite + Unpin> { |
||||
write: W, |
||||
writer: LowLevelNTriplesWriter, |
||||
buffer: Vec<u8>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNTriplesWriter<W> { |
||||
/// Writes an extra triple.
|
||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.writer.write_triple(t, &mut self.buffer)?; |
||||
self.write.write_all(&self.buffer).await?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
/// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::NTriplesSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = NTriplesSerializer::new().serialize();
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ), &mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelNTriplesWriter; |
||||
|
||||
impl LowLevelNTriplesWriter { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn write_triple<'a>( |
||||
&mut self, |
||||
t: impl Into<TripleRef<'a>>, |
||||
mut write: impl Write, |
||||
) -> io::Result<()> { |
||||
writeln!(write, "{} .", t.into()) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
use crate::oxrdf::{Literal, NamedNode}; |
||||
|
||||
#[test] |
||||
fn unchecked_parsing() { |
||||
let triples = NTriplesParser::new() |
||||
.unchecked() |
||||
.parse_read(r#"<foo> <bar> "baz"@toolonglangtag ."#.as_bytes()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.unwrap(); |
||||
assert_eq!( |
||||
triples, |
||||
[Triple::new( |
||||
NamedNode::new_unchecked("foo"), |
||||
NamedNode::new_unchecked("bar"), |
||||
Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"), |
||||
)] |
||||
) |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,97 @@ |
||||
use std::ops::Range; |
||||
use std::{fmt, io}; |
||||
|
||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
||||
pub struct TextPosition { |
||||
pub line: u64, |
||||
pub column: u64, |
||||
pub offset: u64, |
||||
} |
||||
|
||||
/// An error in the syntax of the parsed file.
|
||||
///
|
||||
/// It is composed of a message and a byte range in the input.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub struct TurtleSyntaxError { |
||||
pub(super) location: Range<TextPosition>, |
||||
pub(super) message: String, |
||||
} |
||||
|
||||
impl TurtleSyntaxError { |
||||
/// The location of the error inside of the file.
|
||||
#[inline] |
||||
pub fn location(&self) -> Range<TextPosition> { |
||||
self.location.clone() |
||||
} |
||||
|
||||
/// The error message.
|
||||
#[inline] |
||||
pub fn message(&self) -> &str { |
||||
&self.message |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for TurtleSyntaxError { |
||||
#[inline] |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
if self.location.start.offset + 1 >= self.location.end.offset { |
||||
write!( |
||||
f, |
||||
"Parser error at line {} column {}: {}", |
||||
self.location.start.line + 1, |
||||
self.location.start.column + 1, |
||||
self.message |
||||
) |
||||
} else if self.location.start.line == self.location.end.line { |
||||
write!( |
||||
f, |
||||
"Parser error between at line {} between columns {} and column {}: {}", |
||||
self.location.start.line + 1, |
||||
self.location.start.column + 1, |
||||
self.location.end.column + 1, |
||||
self.message |
||||
) |
||||
} else { |
||||
write!( |
||||
f, |
||||
"Parser error between line {} column {} and line {} column {}: {}", |
||||
self.location.start.line + 1, |
||||
self.location.start.column + 1, |
||||
self.location.end.line + 1, |
||||
self.location.end.column + 1, |
||||
self.message |
||||
) |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<TurtleSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: TurtleSyntaxError) -> Self { |
||||
Self::new(io::ErrorKind::InvalidData, error) |
||||
} |
||||
} |
||||
|
||||
/// A parsing error.
|
||||
///
|
||||
/// It is the union of [`TurtleSyntaxError`] and [`io::Error`].
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum TurtleParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] TurtleSyntaxError), |
||||
} |
||||
|
||||
impl From<TurtleParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: TurtleParseError) -> Self { |
||||
match error { |
||||
TurtleParseError::Syntax(e) => e.into(), |
||||
TurtleParseError::Io(e) => e, |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,432 @@ |
||||
use crate::oxttl::toolkit::error::{TextPosition, TurtleSyntaxError}; |
||||
use memchr::{memchr2, memchr2_iter}; |
||||
use std::borrow::Cow; |
||||
use std::cmp::min; |
||||
use std::io::{self, Read}; |
||||
use std::ops::{Range, RangeInclusive}; |
||||
use std::str; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncReadExt}; |
||||
|
||||
pub trait TokenRecognizer { |
||||
type Token<'a> |
||||
where |
||||
Self: 'a; |
||||
type Options: Default; |
||||
|
||||
fn recognize_next_token<'a>( |
||||
&mut self, |
||||
data: &'a [u8], |
||||
is_ending: bool, |
||||
config: &Self::Options, |
||||
) -> Option<(usize, Result<Self::Token<'a>, TokenRecognizerError>)>; |
||||
} |
||||
|
||||
pub struct TokenRecognizerError { |
||||
pub location: Range<usize>, |
||||
pub message: String, |
||||
} |
||||
|
||||
impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError { |
||||
fn from((location, message): (Range<usize>, S)) -> Self { |
||||
Self { |
||||
location, |
||||
message: message.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[allow(clippy::range_plus_one)] |
||||
impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError { |
||||
fn from((location, message): (RangeInclusive<usize>, S)) -> Self { |
||||
(*location.start()..*location.end() + 1, message).into() |
||||
} |
||||
} |
||||
|
||||
impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError { |
||||
fn from((location, message): (usize, S)) -> Self { |
||||
(location..=location, message).into() |
||||
} |
||||
} |
||||
|
||||
pub struct Lexer<R: TokenRecognizer> { |
||||
parser: R, |
||||
data: Vec<u8>, |
||||
position: Position, |
||||
previous_position: Position, // Lexer position before the last emitted token
|
||||
is_ending: bool, |
||||
min_buffer_size: usize, |
||||
max_buffer_size: usize, |
||||
is_line_jump_whitespace: bool, |
||||
line_comment_start: Option<&'static [u8]>, |
||||
} |
||||
|
||||
#[derive(Clone, Copy)] |
||||
struct Position { |
||||
line_start_buffer_offset: usize, |
||||
buffer_offset: usize, |
||||
global_offset: u64, |
||||
global_line: u64, |
||||
} |
||||
|
||||
impl<R: TokenRecognizer> Lexer<R> { |
||||
pub fn new( |
||||
parser: R, |
||||
min_buffer_size: usize, |
||||
max_buffer_size: usize, |
||||
is_line_jump_whitespace: bool, |
||||
line_comment_start: Option<&'static [u8]>, |
||||
) -> Self { |
||||
Self { |
||||
parser, |
||||
data: Vec::new(), |
||||
position: Position { |
||||
line_start_buffer_offset: 0, |
||||
buffer_offset: 0, |
||||
global_offset: 0, |
||||
global_line: 0, |
||||
}, |
||||
previous_position: Position { |
||||
line_start_buffer_offset: 0, |
||||
buffer_offset: 0, |
||||
global_offset: 0, |
||||
global_line: 0, |
||||
}, |
||||
is_ending: false, |
||||
min_buffer_size, |
||||
max_buffer_size, |
||||
is_line_jump_whitespace, |
||||
line_comment_start, |
||||
} |
||||
} |
||||
|
||||
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||
self.shrink_data(); |
||||
self.data.extend_from_slice(other); |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn end(&mut self) { |
||||
self.is_ending = true; |
||||
} |
||||
|
||||
pub fn extend_from_read(&mut self, read: &mut impl Read) -> io::Result<()> { |
||||
self.shrink_data(); |
||||
if self.data.len() == self.max_buffer_size { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::OutOfMemory, |
||||
format!( |
||||
"Reached the buffer maximal size of {}", |
||||
self.max_buffer_size |
||||
), |
||||
)); |
||||
} |
||||
let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size); |
||||
let new_start = self.data.len(); |
||||
self.data.resize(min_end, 0); |
||||
if self.data.len() < self.data.capacity() { |
||||
// We keep extending to have as much space as available without reallocation
|
||||
self.data.resize(self.data.capacity(), 0); |
||||
} |
||||
let read = read.read(&mut self.data[new_start..])?; |
||||
self.data.truncate(new_start + read); |
||||
self.is_ending = read == 0; |
||||
Ok(()) |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn extend_from_tokio_async_read( |
||||
&mut self, |
||||
read: &mut (impl AsyncRead + Unpin), |
||||
) -> io::Result<()> { |
||||
self.shrink_data(); |
||||
if self.data.len() == self.max_buffer_size { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::OutOfMemory, |
||||
format!( |
||||
"Reached the buffer maximal size of {}", |
||||
self.max_buffer_size |
||||
), |
||||
)); |
||||
} |
||||
let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size); |
||||
let new_start = self.data.len(); |
||||
self.data.resize(min_end, 0); |
||||
if self.data.len() < self.data.capacity() { |
||||
// We keep extending to have as much space as available without reallocation
|
||||
self.data.resize(self.data.capacity(), 0); |
||||
} |
||||
let read = read.read(&mut self.data[new_start..]).await?; |
||||
self.data.truncate(new_start + read); |
||||
self.is_ending = read == 0; |
||||
Ok(()) |
||||
} |
||||
|
||||
#[allow(clippy::unwrap_in_result)] |
||||
pub fn read_next( |
||||
&mut self, |
||||
options: &R::Options, |
||||
) -> Option<Result<R::Token<'_>, TurtleSyntaxError>> { |
||||
self.skip_whitespaces_and_comments()?; |
||||
self.previous_position = self.position; |
||||
let Some((consumed, result)) = self.parser.recognize_next_token( |
||||
&self.data[self.position.buffer_offset..], |
||||
self.is_ending, |
||||
options, |
||||
) else { |
||||
return if self.is_ending { |
||||
if self.position.buffer_offset == self.data.len() { |
||||
None // We have finished
|
||||
} else { |
||||
let (new_line_jumps, new_line_start) = |
||||
Self::find_number_of_line_jumps_and_start_of_last_line( |
||||
&self.data[self.position.buffer_offset..], |
||||
); |
||||
if new_line_jumps > 0 { |
||||
self.position.line_start_buffer_offset = |
||||
self.position.buffer_offset + new_line_start; |
||||
} |
||||
self.position.global_offset += |
||||
u64::try_from(self.data.len() - self.position.buffer_offset).unwrap(); |
||||
self.position.buffer_offset = self.data.len(); |
||||
self.position.global_line += new_line_jumps; |
||||
let new_position = TextPosition { |
||||
line: self.position.global_line, |
||||
column: Self::column_from_bytes( |
||||
&self.data[self.position.line_start_buffer_offset..], |
||||
), |
||||
offset: self.position.global_offset, |
||||
}; |
||||
let error = TurtleSyntaxError { |
||||
location: new_position..new_position, |
||||
message: "Unexpected end of file".into(), |
||||
}; |
||||
self.position.buffer_offset = self.data.len(); // We consume everything
|
||||
Some(Err(error)) |
||||
} |
||||
} else { |
||||
None |
||||
}; |
||||
}; |
||||
debug_assert!( |
||||
consumed > 0, |
||||
"The lexer must consume at least one byte each time" |
||||
); |
||||
debug_assert!( |
||||
self.position.buffer_offset + consumed <= self.data.len(), |
||||
"The lexer tried to consumed {consumed} bytes but only {} bytes are readable", |
||||
self.data.len() - self.position.buffer_offset |
||||
); |
||||
let (new_line_jumps, new_line_start) = |
||||
Self::find_number_of_line_jumps_and_start_of_last_line( |
||||
&self.data[self.position.buffer_offset..self.position.buffer_offset + consumed], |
||||
); |
||||
if new_line_jumps > 0 { |
||||
self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start; |
||||
} |
||||
self.position.buffer_offset += consumed; |
||||
self.position.global_offset += u64::try_from(consumed).unwrap(); |
||||
self.position.global_line += new_line_jumps; |
||||
Some(result.map_err(|e| TurtleSyntaxError { |
||||
location: self.location_from_buffer_offset_range(e.location), |
||||
message: e.message, |
||||
})) |
||||
} |
||||
|
||||
pub fn location_from_buffer_offset_range( |
||||
&self, |
||||
offset_range: Range<usize>, |
||||
) -> Range<TextPosition> { |
||||
let start_offset = self.previous_position.buffer_offset + offset_range.start; |
||||
let (start_extra_line_jumps, start_line_start) = |
||||
Self::find_number_of_line_jumps_and_start_of_last_line( |
||||
&self.data[self.previous_position.buffer_offset..start_offset], |
||||
); |
||||
let start_line_start = if start_extra_line_jumps > 0 { |
||||
start_line_start + self.previous_position.buffer_offset |
||||
} else { |
||||
self.previous_position.line_start_buffer_offset |
||||
}; |
||||
let end_offset = self.previous_position.buffer_offset + offset_range.end; |
||||
let (end_extra_line_jumps, end_line_start) = |
||||
Self::find_number_of_line_jumps_and_start_of_last_line( |
||||
&self.data[self.previous_position.buffer_offset..end_offset], |
||||
); |
||||
let end_line_start = if end_extra_line_jumps > 0 { |
||||
end_line_start + self.previous_position.buffer_offset |
||||
} else { |
||||
self.previous_position.line_start_buffer_offset |
||||
}; |
||||
TextPosition { |
||||
line: self.previous_position.global_line + start_extra_line_jumps, |
||||
column: Self::column_from_bytes(&self.data[start_line_start..start_offset]), |
||||
offset: self.previous_position.global_offset |
||||
+ u64::try_from(offset_range.start).unwrap(), |
||||
}..TextPosition { |
||||
line: self.previous_position.global_line + end_extra_line_jumps, |
||||
column: Self::column_from_bytes(&self.data[end_line_start..end_offset]), |
||||
offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(), |
||||
} |
||||
} |
||||
|
||||
pub fn last_token_location(&self) -> Range<TextPosition> { |
||||
TextPosition { |
||||
line: self.previous_position.global_line, |
||||
column: Self::column_from_bytes( |
||||
&self.data[self.previous_position.line_start_buffer_offset |
||||
..self.previous_position.buffer_offset], |
||||
), |
||||
offset: self.previous_position.global_offset, |
||||
}..TextPosition { |
||||
line: self.position.global_line, |
||||
column: Self::column_from_bytes( |
||||
&self.data[self.position.line_start_buffer_offset..self.position.buffer_offset], |
||||
), |
||||
offset: self.position.global_offset, |
||||
} |
||||
} |
||||
|
||||
pub fn last_token_source(&self) -> Cow<'_, str> { |
||||
String::from_utf8_lossy( |
||||
&self.data[self.previous_position.buffer_offset..self.position.buffer_offset], |
||||
) |
||||
} |
||||
|
||||
pub fn is_end(&self) -> bool { |
||||
self.is_ending && self.data.len() == self.position.buffer_offset |
||||
} |
||||
|
||||
#[allow(clippy::unwrap_in_result)] |
||||
fn skip_whitespaces_and_comments(&mut self) -> Option<()> { |
||||
loop { |
||||
self.skip_whitespaces()?; |
||||
|
||||
let buf = &self.data[self.position.buffer_offset..]; |
||||
if let Some(line_comment_start) = self.line_comment_start { |
||||
if buf.starts_with(line_comment_start) { |
||||
// Comment
|
||||
if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) { |
||||
let mut end_position = line_comment_start.len() + end; |
||||
if buf.get(end_position).copied() == Some(b'\r') { |
||||
// We look for \n for Windows line end style
|
||||
if let Some(c) = buf.get(end_position + 1) { |
||||
if *c == b'\n' { |
||||
end_position += 1; |
||||
} |
||||
} else if !self.is_ending { |
||||
return None; // We need to read more
|
||||
} |
||||
} |
||||
let comment_size = end_position + 1; |
||||
self.position.buffer_offset += comment_size; |
||||
self.position.line_start_buffer_offset = self.position.buffer_offset; |
||||
self.position.global_offset += u64::try_from(comment_size).unwrap(); |
||||
self.position.global_line += 1; |
||||
continue; |
||||
} |
||||
if self.is_ending { |
||||
self.position.buffer_offset = self.data.len(); // EOF
|
||||
return Some(()); |
||||
} |
||||
return None; // We need more data
|
||||
} |
||||
} |
||||
return Some(()); |
||||
} |
||||
} |
||||
|
||||
fn skip_whitespaces(&mut self) -> Option<()> { |
||||
if self.is_line_jump_whitespace { |
||||
let mut i = self.position.buffer_offset; |
||||
while let Some(c) = self.data.get(i) { |
||||
match c { |
||||
b' ' | b'\t' => { |
||||
self.position.buffer_offset += 1; |
||||
self.position.global_offset += 1; |
||||
} |
||||
b'\r' => { |
||||
// We look for \n for Windows line end style
|
||||
let mut increment: u8 = 1; |
||||
if let Some(c) = self.data.get(i + 1) { |
||||
if *c == b'\n' { |
||||
increment += 1; |
||||
i += 1; |
||||
} |
||||
} else if !self.is_ending { |
||||
return None; // We need to read more
|
||||
} |
||||
self.position.buffer_offset += usize::from(increment); |
||||
self.position.line_start_buffer_offset = self.position.buffer_offset; |
||||
self.position.global_offset += u64::from(increment); |
||||
self.position.global_line += 1; |
||||
} |
||||
b'\n' => { |
||||
self.position.buffer_offset += 1; |
||||
self.position.line_start_buffer_offset = self.position.buffer_offset; |
||||
self.position.global_offset += 1; |
||||
self.position.global_line += 1; |
||||
} |
||||
_ => return Some(()), |
||||
} |
||||
i += 1; |
||||
// TODO: SIMD
|
||||
} |
||||
} else { |
||||
for c in &self.data[self.position.buffer_offset..] { |
||||
if matches!(c, b' ' | b'\t') { |
||||
self.position.buffer_offset += 1; |
||||
self.position.global_offset += 1; |
||||
} else { |
||||
return Some(()); |
||||
} |
||||
// TODO: SIMD
|
||||
} |
||||
} |
||||
Some(()) |
||||
} |
||||
|
||||
fn shrink_data(&mut self) { |
||||
if self.position.line_start_buffer_offset > 0 { |
||||
self.data |
||||
.copy_within(self.position.line_start_buffer_offset.., 0); |
||||
self.data |
||||
.truncate(self.data.len() - self.position.line_start_buffer_offset); |
||||
self.position.buffer_offset -= self.position.line_start_buffer_offset; |
||||
self.position.line_start_buffer_offset = 0; |
||||
self.previous_position = self.position; |
||||
} |
||||
} |
||||
|
||||
fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) { |
||||
let mut num_of_jumps = 0; |
||||
let mut last_jump_pos = 0; |
||||
let mut previous_cr = 0; |
||||
for pos in memchr2_iter(b'\r', b'\n', bytes) { |
||||
if bytes[pos] == b'\r' { |
||||
previous_cr = pos; |
||||
num_of_jumps += 1; |
||||
last_jump_pos = pos + 1; |
||||
} else { |
||||
if previous_cr < pos - 1 { |
||||
// We count \r\n as a single line jump
|
||||
num_of_jumps += 1; |
||||
} |
||||
last_jump_pos = pos + 1; |
||||
} |
||||
} |
||||
(num_of_jumps, last_jump_pos) |
||||
} |
||||
|
||||
fn column_from_bytes(bytes: &[u8]) -> u64 { |
||||
match str::from_utf8(bytes) { |
||||
Ok(s) => u64::try_from(s.chars().count()).unwrap(), |
||||
Err(e) => { |
||||
if e.valid_up_to() == 0 { |
||||
0 |
||||
} else { |
||||
Self::column_from_bytes(&bytes[..e.valid_up_to()]) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,13 @@ |
||||
//! oxttl parsing toolkit.
|
||||
//!
|
||||
//! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
|
||||
|
||||
mod error; |
||||
mod lexer; |
||||
mod parser; |
||||
|
||||
pub use self::error::{TextPosition, TurtleParseError, TurtleSyntaxError}; |
||||
pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError}; |
||||
#[cfg(feature = "async-tokio")] |
||||
pub use self::parser::FromTokioAsyncReadIterator; |
||||
pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError}; |
@ -0,0 +1,183 @@ |
||||
use crate::oxttl::toolkit::error::{TurtleParseError, TurtleSyntaxError}; |
||||
use crate::oxttl::toolkit::lexer::{Lexer, TokenRecognizer}; |
||||
use std::io::Read; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncRead; |
||||
|
||||
pub trait RuleRecognizer: Sized { |
||||
type TokenRecognizer: TokenRecognizer; |
||||
type Output; |
||||
type Context; |
||||
|
||||
fn error_recovery_state(self) -> Self; |
||||
|
||||
fn recognize_next( |
||||
self, |
||||
token: <Self::TokenRecognizer as TokenRecognizer>::Token<'_>, |
||||
context: &mut Self::Context, |
||||
results: &mut Vec<Self::Output>, |
||||
errors: &mut Vec<RuleRecognizerError>, |
||||
) -> Self; |
||||
|
||||
fn recognize_end( |
||||
self, |
||||
context: &mut Self::Context, |
||||
results: &mut Vec<Self::Output>, |
||||
errors: &mut Vec<RuleRecognizerError>, |
||||
); |
||||
|
||||
fn lexer_options( |
||||
context: &Self::Context, |
||||
) -> &<Self::TokenRecognizer as TokenRecognizer>::Options; |
||||
} |
||||
|
||||
pub struct RuleRecognizerError { |
||||
pub message: String, |
||||
} |
||||
|
||||
impl<S: Into<String>> From<S> for RuleRecognizerError { |
||||
fn from(message: S) -> Self { |
||||
Self { |
||||
message: message.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[allow(clippy::partial_pub_fields)] |
||||
pub struct Parser<RR: RuleRecognizer> { |
||||
lexer: Lexer<RR::TokenRecognizer>, |
||||
state: Option<RR>, |
||||
pub context: RR::Context, |
||||
results: Vec<RR::Output>, |
||||
errors: Vec<RuleRecognizerError>, |
||||
} |
||||
|
||||
impl<RR: RuleRecognizer> Parser<RR> { |
||||
pub fn new(lexer: Lexer<RR::TokenRecognizer>, recognizer: RR, context: RR::Context) -> Self { |
||||
Self { |
||||
lexer, |
||||
state: Some(recognizer), |
||||
context, |
||||
results: vec![], |
||||
errors: vec![], |
||||
} |
||||
} |
||||
|
||||
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||
self.lexer.extend_from_slice(other) |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn end(&mut self) { |
||||
self.lexer.end() |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn is_end(&self) -> bool { |
||||
self.state.is_none() && self.results.is_empty() && self.errors.is_empty() |
||||
} |
||||
|
||||
pub fn read_next(&mut self) -> Option<Result<RR::Output, TurtleSyntaxError>> { |
||||
loop { |
||||
if let Some(error) = self.errors.pop() { |
||||
return Some(Err(TurtleSyntaxError { |
||||
location: self.lexer.last_token_location(), |
||||
message: error |
||||
.message |
||||
.replace("TOKEN", &self.lexer.last_token_source()), |
||||
})); |
||||
} |
||||
if let Some(result) = self.results.pop() { |
||||
return Some(Ok(result)); |
||||
} |
||||
if let Some(result) = self.lexer.read_next(RR::lexer_options(&self.context)) { |
||||
match result { |
||||
Ok(token) => { |
||||
self.state = self.state.take().map(|state| { |
||||
state.recognize_next( |
||||
token, |
||||
&mut self.context, |
||||
&mut self.results, |
||||
&mut self.errors, |
||||
) |
||||
}); |
||||
continue; |
||||
} |
||||
Err(e) => { |
||||
self.state = self.state.take().map(RR::error_recovery_state); |
||||
return Some(Err(e)); |
||||
} |
||||
} |
||||
} |
||||
if self.lexer.is_end() { |
||||
self.state.take()?.recognize_end( |
||||
&mut self.context, |
||||
&mut self.results, |
||||
&mut self.errors, |
||||
) |
||||
} else { |
||||
return None; |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub fn parse_read<R: Read>(self, read: R) -> FromReadIterator<R, RR> { |
||||
FromReadIterator { read, parser: self } |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
read: R, |
||||
) -> FromTokioAsyncReadIterator<R, RR> { |
||||
FromTokioAsyncReadIterator { read, parser: self } |
||||
} |
||||
} |
||||
|
||||
#[allow(clippy::partial_pub_fields)] |
||||
pub struct FromReadIterator<R: Read, RR: RuleRecognizer> { |
||||
read: R, |
||||
pub parser: Parser<RR>, |
||||
} |
||||
|
||||
impl<R: Read, RR: RuleRecognizer> Iterator for FromReadIterator<R, RR> { |
||||
type Item = Result<RR::Output, TurtleParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
while !self.parser.is_end() { |
||||
if let Some(result) = self.parser.read_next() { |
||||
return Some(result.map_err(TurtleParseError::Syntax)); |
||||
} |
||||
if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) { |
||||
return Some(Err(e.into())); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadIterator<R: AsyncRead + Unpin, RR: RuleRecognizer> { |
||||
pub read: R, |
||||
pub parser: Parser<RR>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin, RR: RuleRecognizer> FromTokioAsyncReadIterator<R, RR> { |
||||
pub async fn next(&mut self) -> Option<Result<RR::Output, TurtleParseError>> { |
||||
while !self.parser.is_end() { |
||||
if let Some(result) = self.parser.read_next() { |
||||
return Some(result.map_err(TurtleParseError::Syntax)); |
||||
} |
||||
if let Err(e) = self |
||||
.parser |
||||
.lexer |
||||
.extend_from_tokio_async_read(&mut self.read) |
||||
.await |
||||
{ |
||||
return Some(Err(e.into())); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,878 @@ |
||||
//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`]
|
||||
//! and a serializer implemented by [`TurtleSerializer`].
|
||||
|
||||
use crate::oxrdf::{GraphNameRef, Triple, TripleRef}; |
||||
use crate::oxttl::terse::TriGRecognizer; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::toolkit::FromTokioAsyncReadIterator; |
||||
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter; |
||||
use crate::oxttl::trig::{LowLevelTriGWriter, ToWriteTriGWriter, TriGSerializer}; |
||||
use oxiri::{Iri, IriParseError}; |
||||
use std::collections::hash_map::Iter; |
||||
use std::collections::HashMap; |
||||
use std::io::{self, Read, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncWrite}; |
||||
|
||||
/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
|
||||
///
|
||||
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" .
|
||||
/// <bar> a schema:Person ;
|
||||
/// schema:name "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct TurtleParser { |
||||
unchecked: bool, |
||||
base: Option<Iri<String>>, |
||||
prefixes: HashMap<String, Iri<String>>, |
||||
#[cfg(feature = "rdf-star")] |
||||
with_quoted_triples: bool, |
||||
} |
||||
|
||||
impl TurtleParser { |
||||
/// Builds a new [`TurtleParser`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self::default() |
||||
} |
||||
|
||||
/// Assumes the file is valid to make parsing faster.
|
||||
///
|
||||
/// It will skip some validations.
|
||||
///
|
||||
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
|
||||
#[inline] |
||||
pub fn unchecked(mut self) -> Self { |
||||
self.unchecked = true; |
||||
self |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> { |
||||
self.base = Some(Iri::parse(base_iri.into())?); |
||||
Ok(self) |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.prefixes |
||||
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?); |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
|
||||
#[cfg(feature = "rdf-star")] |
||||
#[inline] |
||||
pub fn with_quoted_triples(mut self) -> Self { |
||||
self.with_quoted_triples = true; |
||||
self |
||||
} |
||||
|
||||
/// Parses a Turtle file from a [`Read`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" .
|
||||
/// <bar> a schema:Person ;
|
||||
/// schema:name "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>(self, read: R) -> FromReadTurtleReader<R> { |
||||
FromReadTurtleReader { |
||||
inner: self.parse().parser.parse_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Parses a Turtle file from a [`AsyncRead`] implementation.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" .
|
||||
/// <bar> a schema:Person ;
|
||||
/// schema:name "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
self, |
||||
read: R, |
||||
) -> FromTokioAsyncReadTurtleReader<R> { |
||||
FromTokioAsyncReadTurtleReader { |
||||
inner: self.parse().parser.parse_tokio_async_read(read), |
||||
} |
||||
} |
||||
|
||||
/// Allows to parse a Turtle file by using a low-level API.
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file: [&[u8]; 5] = [
|
||||
/// b"@base <http://example.com/>",
|
||||
/// b". @prefix schema: <http://schema.org/> .",
|
||||
/// b"<foo> a schema:Person",
|
||||
/// b" ; schema:name \"Foo\" . <bar>",
|
||||
/// b" a schema:Person ; schema:name \"Bar\" .",
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = TurtleParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many triples from the parser as possible
|
||||
/// while let Some(triple) = parser.read_next() {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn parse(self) -> LowLevelTurtleReader { |
||||
LowLevelTurtleReader { |
||||
parser: TriGRecognizer::new_parser( |
||||
false, |
||||
#[cfg(feature = "rdf-star")] |
||||
self.with_quoted_triples, |
||||
self.unchecked, |
||||
self.base, |
||||
self.prefixes, |
||||
), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Parses a Turtle file from a [`Read`] implementation. Can be built using [`TurtleParser::parse_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" .
|
||||
/// <bar> a schema:Person ;
|
||||
/// schema:name "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct FromReadTurtleReader<R: Read> { |
||||
inner: FromReadIterator<R, TriGRecognizer>, |
||||
} |
||||
|
||||
impl<R: Read> FromReadTurtleReader<R> { |
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse_read(file.as_ref());
|
||||
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { |
||||
TurtlePrefixesIter { |
||||
inner: self.inner.parser.context.prefixes(), |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse_read(file.as_ref());
|
||||
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
|
||||
///
|
||||
/// reader.next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
self.inner |
||||
.parser |
||||
.context |
||||
.lexer_options |
||||
.base_iri |
||||
.as_ref() |
||||
.map(Iri::as_str) |
||||
} |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadTurtleReader<R> { |
||||
type Item = Result<Triple, TurtleParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some(self.inner.next()?.map(Into::into)) |
||||
} |
||||
} |
||||
|
||||
/// Parses a Turtle file from a [`AsyncRead`] implementation. Can be built using [`TurtleParser::parse_tokio_async_read`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" .
|
||||
/// <bar> a schema:Person ;
|
||||
/// schema:name "Bar" ."#;
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// while let Some(triple) = parser.next().await {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct FromTokioAsyncReadTurtleReader<R: AsyncRead + Unpin> { |
||||
inner: FromTokioAsyncReadIterator<R, TriGRecognizer>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTurtleReader<R> { |
||||
/// Reads the next triple or returns `None` if the file is finished.
|
||||
pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> { |
||||
Some(self.inner.next().await?.map(Into::into)) |
||||
} |
||||
|
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { |
||||
TurtlePrefixesIter { |
||||
inner: self.inner.parser.context.prefixes(), |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref());
|
||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
||||
///
|
||||
/// reader.next().await.unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
self.inner |
||||
.parser |
||||
.context |
||||
.lexer_options |
||||
.base_iri |
||||
.as_ref() |
||||
.map(Iri::as_str) |
||||
} |
||||
} |
||||
|
||||
/// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`].
|
||||
///
|
||||
/// Count the number of people:
|
||||
/// ```
|
||||
/// use oxrdf::vocab::rdf;
|
||||
/// use oxrdf::NamedNodeRef;
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file: [&[u8]; 5] = [
|
||||
/// b"@base <http://example.com/>",
|
||||
/// b". @prefix schema: <http://schema.org/> .",
|
||||
/// b"<foo> a schema:Person",
|
||||
/// b" ; schema:name \"Foo\" . <bar>",
|
||||
/// b" a schema:Person ; schema:name \"Bar\" .",
|
||||
/// ];
|
||||
///
|
||||
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
|
||||
/// let mut count = 0;
|
||||
/// let mut parser = TurtleParser::new().parse();
|
||||
/// let mut file_chunks = file.iter();
|
||||
/// while !parser.is_end() {
|
||||
/// // We feed more data to the parser
|
||||
/// if let Some(chunk) = file_chunks.next() {
|
||||
/// parser.extend_from_slice(chunk);
|
||||
/// } else {
|
||||
/// parser.end(); // It's finished
|
||||
/// }
|
||||
/// // We read as many triples from the parser as possible
|
||||
/// while let Some(triple) = parser.read_next() {
|
||||
/// let triple = triple?;
|
||||
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
|
||||
/// count += 1;
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// assert_eq!(2, count);
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelTurtleReader { |
||||
parser: Parser<TriGRecognizer>, |
||||
} |
||||
|
||||
impl LowLevelTurtleReader { |
||||
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
|
||||
pub fn extend_from_slice(&mut self, other: &[u8]) { |
||||
self.parser.extend_from_slice(other) |
||||
} |
||||
|
||||
/// Tell the parser that the file is finished.
|
||||
///
|
||||
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
|
||||
pub fn end(&mut self) { |
||||
self.parser.end() |
||||
} |
||||
|
||||
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
|
||||
pub fn is_end(&self) -> bool { |
||||
self.parser.is_end() |
||||
} |
||||
|
||||
/// Attempt to parse a new triple from the already provided data.
|
||||
///
|
||||
/// Returns [`None`] if the parsing is finished or more data is required.
|
||||
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
|
||||
pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> { |
||||
Some(self.parser.read_next()?.map(Into::into)) |
||||
} |
||||
|
||||
/// The list of IRI prefixes considered at the current step of the parsing.
|
||||
///
|
||||
/// This method returns (prefix name, prefix value) tuples.
|
||||
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
|
||||
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse();
|
||||
/// reader.extend_from_slice(file);
|
||||
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
|
||||
///
|
||||
/// reader.read_next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(
|
||||
/// reader.prefixes().collect::<Vec<_>>(),
|
||||
/// [("schema", "http://schema.org/")]
|
||||
/// ); // There are now prefixes
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> { |
||||
TurtlePrefixesIter { |
||||
inner: self.parser.context.prefixes(), |
||||
} |
||||
} |
||||
|
||||
/// The base IRI considered at the current step of the parsing.
|
||||
///
|
||||
/// ```
|
||||
/// use oxttl::TurtleParser;
|
||||
///
|
||||
/// let file = br#"@base <http://example.com/> .
|
||||
/// @prefix schema: <http://schema.org/> .
|
||||
/// <foo> a schema:Person ;
|
||||
/// schema:name "Foo" ."#;
|
||||
///
|
||||
/// let mut reader = TurtleParser::new().parse();
|
||||
/// reader.extend_from_slice(file);
|
||||
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
|
||||
///
|
||||
/// reader.read_next().unwrap()?; // We read the first triple
|
||||
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn base_iri(&self) -> Option<&str> { |
||||
self.parser |
||||
.context |
||||
.lexer_options |
||||
.base_iri |
||||
.as_ref() |
||||
.map(Iri::as_str) |
||||
} |
||||
} |
||||
|
||||
/// Iterator on the file prefixes.
|
||||
///
|
||||
/// See [`LowLevelTurtleReader::prefixes`].
|
||||
pub struct TurtlePrefixesIter<'a> { |
||||
inner: Iter<'a, String, Iri<String>>, |
||||
} |
||||
|
||||
impl<'a> Iterator for TurtlePrefixesIter<'a> { |
||||
type Item = (&'a str, &'a str); |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
let (key, value) = self.inner.next()?; |
||||
Some((key.as_str(), value.as_str())) |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
self.inner.size_hint() |
||||
} |
||||
} |
||||
|
||||
/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
|
||||
///
|
||||
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
#[must_use] |
||||
pub struct TurtleSerializer { |
||||
inner: TriGSerializer, |
||||
} |
||||
|
||||
impl TurtleSerializer { |
||||
/// Builds a new [`TurtleSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self::default() |
||||
} |
||||
|
||||
#[inline] |
||||
pub fn with_prefix( |
||||
mut self, |
||||
prefix_name: impl Into<String>, |
||||
prefix_iri: impl Into<String>, |
||||
) -> Result<Self, IriParseError> { |
||||
self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?; |
||||
Ok(self) |
||||
} |
||||
|
||||
/// Writes a Turtle file to a [`Write`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteTurtleWriter<W> { |
||||
ToWriteTurtleWriter { |
||||
inner: self.inner.serialize_to_write(write), |
||||
} |
||||
} |
||||
|
||||
/// Writes a Turtle file to a [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer
|
||||
/// .write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
|
||||
/// ))
|
||||
/// .await?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
self, |
||||
write: W, |
||||
) -> ToTokioAsyncWriteTurtleWriter<W> { |
||||
ToTokioAsyncWriteTurtleWriter { |
||||
inner: self.inner.serialize_to_tokio_async_write(write), |
||||
} |
||||
} |
||||
|
||||
/// Builds a low-level Turtle writer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize();
|
||||
/// writer.write_triple(
|
||||
/// TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ),
|
||||
/// &mut buf,
|
||||
/// )?;
|
||||
/// writer.finish(&mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub fn serialize(self) -> LowLevelTurtleWriter { |
||||
LowLevelTurtleWriter { |
||||
inner: self.inner.serialize(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a Turtle file to a [`Write`] implementation. Can be built using [`TurtleSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteTurtleWriter<W: Write> { |
||||
inner: ToWriteTriGWriter<W>, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteTurtleWriter<W> { |
||||
/// Writes an extra triple.
|
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.inner |
||||
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(self) -> io::Result<W> { |
||||
self.inner.finish() |
||||
} |
||||
} |
||||
|
||||
/// Writes a Turtle file to a [`AsyncWrite`] implementation. Can be built using [`TurtleSerializer::serialize_to_tokio_async_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize_to_tokio_async_write(Vec::new());
|
||||
/// writer
|
||||
/// .write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new_unchecked("http://example.com#me"),
|
||||
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
|
||||
/// ))
|
||||
/// .await?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// writer.finish().await?.as_slice()
|
||||
/// );
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteTurtleWriter<W: AsyncWrite + Unpin> { |
||||
inner: ToTokioAsyncWriteTriGWriter<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTurtleWriter<W> { |
||||
/// Writes an extra triple.
|
||||
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
self.inner |
||||
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph)) |
||||
.await |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub async fn finish(self) -> io::Result<W> { |
||||
self.inner.finish().await |
||||
} |
||||
} |
||||
|
||||
/// Writes a Turtle file by using a low-level API. Can be built using [`TurtleSerializer::serialize`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxttl::TurtleSerializer;
|
||||
///
|
||||
/// let mut buf = Vec::new();
|
||||
/// let mut writer = TurtleSerializer::new()
|
||||
/// .with_prefix("schema", "http://schema.org/")?
|
||||
/// .serialize();
|
||||
/// writer.write_triple(
|
||||
/// TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ),
|
||||
/// &mut buf,
|
||||
/// )?;
|
||||
/// writer.finish(&mut buf)?;
|
||||
/// assert_eq!(
|
||||
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
|
||||
/// buf.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct LowLevelTurtleWriter { |
||||
inner: LowLevelTriGWriter, |
||||
} |
||||
|
||||
impl LowLevelTurtleWriter { |
||||
/// Writes an extra triple.
|
||||
pub fn write_triple<'a>( |
||||
&mut self, |
||||
t: impl Into<TripleRef<'a>>, |
||||
write: impl Write, |
||||
) -> io::Result<()> { |
||||
self.inner |
||||
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph), write) |
||||
} |
||||
|
||||
/// Finishes to write the file.
|
||||
pub fn finish(&mut self, write: impl Write) -> io::Result<()> { |
||||
self.inner.finish(write) |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
use crate::oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef}; |
||||
|
||||
#[test] |
||||
fn test_write() -> io::Result<()> { |
||||
let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new()); |
||||
writer.write_triple(TripleRef::new( |
||||
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||
NamedNodeRef::new_unchecked("http://example.com/o"), |
||||
))?; |
||||
writer.write_triple(TripleRef::new( |
||||
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||
NamedNodeRef::new_unchecked("http://example.com/p"), |
||||
LiteralRef::new_simple_literal("foo"), |
||||
))?; |
||||
writer.write_triple(TripleRef::new( |
||||
NamedNodeRef::new_unchecked("http://example.com/s"), |
||||
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"), |
||||
))?; |
||||
writer.write_triple(TripleRef::new( |
||||
BlankNodeRef::new_unchecked("b"), |
||||
NamedNodeRef::new_unchecked("http://example.com/p2"), |
||||
BlankNodeRef::new_unchecked("b2"), |
||||
))?; |
||||
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n"); |
||||
Ok(()) |
||||
} |
||||
} |
@ -0,0 +1,72 @@ |
||||
Sparesults |
||||
========== |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/sparesults.svg)](https://crates.io/crates/sparesults) |
||||
[![Released API docs](https://docs.rs/sparesults/badge.svg)](https://docs.rs/sparesults) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/sparesults)](https://crates.io/crates/sparesults) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
Sparesults is a set of parsers and serializers for [SPARQL](https://www.w3.org/TR/sparql11-overview/) query results formats. |
||||
|
||||
It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#query-result-formats) is also available behind the `rdf-star` feature. |
||||
|
||||
This crate is intended to be a building piece for SPARQL client and server implementations in Rust like [Oxigraph](https://oxigraph.org). |
||||
|
||||
The entry points of this library are the two [`QueryResultsParser`] and [`QueryResultsSerializer`] structs. |
||||
|
||||
Usage example converting a JSON result file into a TSV result file: |
||||
```rust |
||||
use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer}; |
||||
use std::io::Result; |
||||
|
||||
fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> { |
||||
let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json); |
||||
let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv); |
||||
// We start to read the JSON file and see which kind of results it is |
||||
match json_parser.parse_read(json_file)? { |
||||
FromReadQueryResultsReader::Boolean(value) => { |
||||
// it's a boolean result, we copy it in TSV to the output buffer |
||||
tsv_serializer.serialize_boolean_to_write(Vec::new(), value) |
||||
}, |
||||
FromReadQueryResultsReader::Solutions(solutions_reader) => { |
||||
// it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file |
||||
let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?; |
||||
for solution in solutions_reader { |
||||
serialize_solutions_to_write.write(&solution?)?; |
||||
} |
||||
serialize_solutions_to_write.finish() |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Let's test with a boolean |
||||
assert_eq!( |
||||
convert_json_to_tsv(b"{\"boolean\":true}".as_slice()).unwrap(), |
||||
b"true" |
||||
); |
||||
|
||||
// And with a set of solutions |
||||
assert_eq!( |
||||
convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(), |
||||
b"?foo\t?bar\n\"test\"\t\n" |
||||
); |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,948 @@ |
||||
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
|
||||
|
||||
use crate::oxrdf::vocab::xsd; |
||||
use crate::oxrdf::*; |
||||
use crate::sparesults::error::{ |
||||
QueryResultsParseError, QueryResultsSyntaxError, SyntaxErrorKind, TextPosition, |
||||
}; |
||||
use memchr::memchr; |
||||
use std::io::{self, Read, Write}; |
||||
use std::str::{self, FromStr}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; |
||||
|
||||
const MAX_BUFFER_SIZE: usize = 4096 * 4096; |
||||
|
||||
pub fn write_boolean_csv_result<W: Write>(mut write: W, value: bool) -> io::Result<W> { |
||||
write.write_all(if value { b"true" } else { b"false" })?; |
||||
Ok(write) |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn tokio_async_write_boolean_csv_result<W: AsyncWrite + Unpin>( |
||||
mut write: W, |
||||
value: bool, |
||||
) -> io::Result<W> { |
||||
write |
||||
.write_all(if value { b"true" } else { b"false" }) |
||||
.await?; |
||||
Ok(write) |
||||
} |
||||
|
||||
pub struct ToWriteCsvSolutionsWriter<W: Write> { |
||||
inner: InnerCsvSolutionsWriter, |
||||
write: W, |
||||
buffer: String, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteCsvSolutionsWriter<W> { |
||||
pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { |
||||
let mut buffer = String::new(); |
||||
let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables); |
||||
write.write_all(buffer.as_bytes())?; |
||||
buffer.clear(); |
||||
Ok(Self { |
||||
inner, |
||||
write, |
||||
buffer, |
||||
}) |
||||
} |
||||
|
||||
pub fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
self.inner.write(&mut self.buffer, solution); |
||||
self.write.write_all(self.buffer.as_bytes())?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteCsvSolutionsWriter<W: AsyncWrite + Unpin> { |
||||
inner: InnerCsvSolutionsWriter, |
||||
write: W, |
||||
buffer: String, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteCsvSolutionsWriter<W> { |
||||
pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { |
||||
let mut buffer = String::new(); |
||||
let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables); |
||||
write.write_all(buffer.as_bytes()).await?; |
||||
buffer.clear(); |
||||
Ok(Self { |
||||
inner, |
||||
write, |
||||
buffer, |
||||
}) |
||||
} |
||||
|
||||
pub async fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
self.inner.write(&mut self.buffer, solution); |
||||
self.write.write_all(self.buffer.as_bytes()).await?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
struct InnerCsvSolutionsWriter { |
||||
variables: Vec<Variable>, |
||||
} |
||||
|
||||
impl InnerCsvSolutionsWriter { |
||||
fn start(output: &mut String, variables: Vec<Variable>) -> Self { |
||||
let mut start_vars = true; |
||||
for variable in &variables { |
||||
if start_vars { |
||||
start_vars = false; |
||||
} else { |
||||
output.push(','); |
||||
} |
||||
output.push_str(variable.as_str()); |
||||
} |
||||
output.push_str("\r\n"); |
||||
Self { variables } |
||||
} |
||||
|
||||
fn write<'a>( |
||||
&self, |
||||
output: &mut String, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) { |
||||
let mut values = vec![None; self.variables.len()]; |
||||
for (variable, value) in solution { |
||||
if let Some(position) = self.variables.iter().position(|v| *v == variable) { |
||||
values[position] = Some(value); |
||||
} |
||||
} |
||||
let mut start_binding = true; |
||||
for value in values { |
||||
if start_binding { |
||||
start_binding = false; |
||||
} else { |
||||
output.push(','); |
||||
} |
||||
if let Some(value) = value { |
||||
write_csv_term(output, value); |
||||
} |
||||
} |
||||
output.push_str("\r\n"); |
||||
} |
||||
} |
||||
|
||||
fn write_csv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) { |
||||
match term.into() { |
||||
TermRef::NamedNode(uri) => output.push_str(uri.as_str()), |
||||
TermRef::BlankNode(bnode) => { |
||||
output.push_str("_:"); |
||||
output.push_str(bnode.as_str()) |
||||
} |
||||
TermRef::Literal(literal) => write_escaped_csv_string(output, literal.value()), |
||||
#[cfg(feature = "rdf-star")] |
||||
TermRef::Triple(triple) => { |
||||
write_csv_term(output, &triple.subject); |
||||
output.push(' '); |
||||
write_csv_term(output, &triple.predicate); |
||||
output.push(' '); |
||||
write_csv_term(output, &triple.object) |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn write_escaped_csv_string(output: &mut String, s: &str) { |
||||
if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) { |
||||
output.push('"'); |
||||
for c in s.chars() { |
||||
if c == '"' { |
||||
output.push('"'); |
||||
output.push('"'); |
||||
} else { |
||||
output.push(c) |
||||
}; |
||||
} |
||||
output.push('"'); |
||||
} else { |
||||
output.push_str(s) |
||||
} |
||||
} |
||||
|
||||
pub struct ToWriteTsvSolutionsWriter<W: Write> { |
||||
inner: InnerTsvSolutionsWriter, |
||||
write: W, |
||||
buffer: String, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteTsvSolutionsWriter<W> { |
||||
pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { |
||||
let mut buffer = String::new(); |
||||
let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables); |
||||
write.write_all(buffer.as_bytes())?; |
||||
buffer.clear(); |
||||
Ok(Self { |
||||
inner, |
||||
write, |
||||
buffer, |
||||
}) |
||||
} |
||||
|
||||
pub fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
self.inner.write(&mut self.buffer, solution); |
||||
self.write.write_all(self.buffer.as_bytes())?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteTsvSolutionsWriter<W: AsyncWrite + Unpin> { |
||||
inner: InnerTsvSolutionsWriter, |
||||
write: W, |
||||
buffer: String, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTsvSolutionsWriter<W> { |
||||
pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> { |
||||
let mut buffer = String::new(); |
||||
let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables); |
||||
write.write_all(buffer.as_bytes()).await?; |
||||
buffer.clear(); |
||||
Ok(Self { |
||||
inner, |
||||
write, |
||||
buffer, |
||||
}) |
||||
} |
||||
|
||||
pub async fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
self.inner.write(&mut self.buffer, solution); |
||||
self.write.write_all(self.buffer.as_bytes()).await?; |
||||
self.buffer.clear(); |
||||
Ok(()) |
||||
} |
||||
|
||||
pub fn finish(self) -> W { |
||||
self.write |
||||
} |
||||
} |
||||
|
||||
struct InnerTsvSolutionsWriter { |
||||
variables: Vec<Variable>, |
||||
} |
||||
|
||||
impl InnerTsvSolutionsWriter { |
||||
fn start(output: &mut String, variables: Vec<Variable>) -> Self { |
||||
let mut start_vars = true; |
||||
for variable in &variables { |
||||
if start_vars { |
||||
start_vars = false; |
||||
} else { |
||||
output.push('\t'); |
||||
} |
||||
output.push('?'); |
||||
output.push_str(variable.as_str()); |
||||
} |
||||
output.push('\n'); |
||||
Self { variables } |
||||
} |
||||
|
||||
fn write<'a>( |
||||
&self, |
||||
output: &mut String, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) { |
||||
let mut values = vec![None; self.variables.len()]; |
||||
for (variable, value) in solution { |
||||
if let Some(position) = self.variables.iter().position(|v| *v == variable) { |
||||
values[position] = Some(value); |
||||
} |
||||
} |
||||
let mut start_binding = true; |
||||
for value in values { |
||||
if start_binding { |
||||
start_binding = false; |
||||
} else { |
||||
output.push('\t'); |
||||
} |
||||
if let Some(value) = value { |
||||
write_tsv_term(output, value); |
||||
} |
||||
} |
||||
output.push('\n'); |
||||
} |
||||
} |
||||
|
||||
fn write_tsv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) { |
||||
match term.into() { |
||||
TermRef::NamedNode(node) => { |
||||
output.push('<'); |
||||
output.push_str(node.as_str()); |
||||
output.push('>'); |
||||
} |
||||
TermRef::BlankNode(node) => { |
||||
output.push_str("_:"); |
||||
output.push_str(node.as_str()); |
||||
} |
||||
TermRef::Literal(literal) => { |
||||
let value = literal.value(); |
||||
if let Some(language) = literal.language() { |
||||
write_tsv_quoted_str(output, value); |
||||
output.push('@'); |
||||
output.push_str(language); |
||||
} else { |
||||
match literal.datatype() { |
||||
xsd::BOOLEAN if is_turtle_boolean(value) => output.push_str(value), |
||||
xsd::INTEGER if is_turtle_integer(value) => output.push_str(value), |
||||
xsd::DECIMAL if is_turtle_decimal(value) => output.push_str(value), |
||||
xsd::DOUBLE if is_turtle_double(value) => output.push_str(value), |
||||
xsd::STRING => write_tsv_quoted_str(output, value), |
||||
datatype => { |
||||
write_tsv_quoted_str(output, value); |
||||
output.push_str("^^"); |
||||
write_tsv_term(output, datatype); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
TermRef::Triple(triple) => { |
||||
output.push_str("<< "); |
||||
write_tsv_term(output, &triple.subject); |
||||
output.push(' '); |
||||
write_tsv_term(output, &triple.predicate); |
||||
output.push(' '); |
||||
write_tsv_term(output, &triple.object); |
||||
output.push_str(" >>"); |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn write_tsv_quoted_str(output: &mut String, string: &str) { |
||||
output.push('"'); |
||||
for c in string.chars() { |
||||
match c { |
||||
'\t' => output.push_str("\\t"), |
||||
'\n' => output.push_str("\\n"), |
||||
'\r' => output.push_str("\\r"), |
||||
'"' => output.push_str("\\\""), |
||||
'\\' => output.push_str("\\\\"), |
||||
_ => output.push(c), |
||||
}; |
||||
} |
||||
output.push('"'); |
||||
} |
||||
|
||||
fn is_turtle_boolean(value: &str) -> bool { |
||||
matches!(value, "true" | "false") |
||||
} |
||||
|
||||
fn is_turtle_integer(value: &str) -> bool { |
||||
// [19] INTEGER ::= [+-]? [0-9]+
|
||||
let mut value = value.as_bytes(); |
||||
if let Some(v) = value.strip_prefix(b"+") { |
||||
value = v; |
||||
} else if let Some(v) = value.strip_prefix(b"-") { |
||||
value = v; |
||||
} |
||||
!value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||
} |
||||
|
||||
fn is_turtle_decimal(value: &str) -> bool { |
||||
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
|
||||
let mut value = value.as_bytes(); |
||||
if let Some(v) = value.strip_prefix(b"+") { |
||||
value = v; |
||||
} else if let Some(v) = value.strip_prefix(b"-") { |
||||
value = v; |
||||
} |
||||
while value.first().map_or(false, u8::is_ascii_digit) { |
||||
value = &value[1..]; |
||||
} |
||||
let Some(value) = value.strip_prefix(b".") else { |
||||
return false; |
||||
}; |
||||
!value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||
} |
||||
|
||||
fn is_turtle_double(value: &str) -> bool { |
||||
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
|
||||
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
|
||||
let mut value = value.as_bytes(); |
||||
if let Some(v) = value.strip_prefix(b"+") { |
||||
value = v; |
||||
} else if let Some(v) = value.strip_prefix(b"-") { |
||||
value = v; |
||||
} |
||||
let mut with_before = false; |
||||
while value.first().map_or(false, u8::is_ascii_digit) { |
||||
value = &value[1..]; |
||||
with_before = true; |
||||
} |
||||
let mut with_after = false; |
||||
if let Some(v) = value.strip_prefix(b".") { |
||||
value = v; |
||||
while value.first().map_or(false, u8::is_ascii_digit) { |
||||
value = &value[1..]; |
||||
with_after = true; |
||||
} |
||||
} |
||||
if let Some(v) = value.strip_prefix(b"e") { |
||||
value = v; |
||||
} else if let Some(v) = value.strip_prefix(b"E") { |
||||
value = v; |
||||
} else { |
||||
return false; |
||||
} |
||||
if let Some(v) = value.strip_prefix(b"+") { |
||||
value = v; |
||||
} else if let Some(v) = value.strip_prefix(b"-") { |
||||
value = v; |
||||
} |
||||
(with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) |
||||
} |
||||
|
||||
pub enum FromReadTsvQueryResultsReader<R: Read> { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: FromReadTsvSolutionsReader<R>, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
impl<R: Read> FromReadTsvQueryResultsReader<R> { |
||||
pub fn read(mut read: R) -> Result<Self, QueryResultsParseError> { |
||||
let mut reader = LineReader::new(); |
||||
let mut buffer = Vec::new(); |
||||
let line = reader.next_line(&mut buffer, &mut read)?; |
||||
Ok(match inner_read_first_line(reader, line)? { |
||||
TsvInnerQueryResults::Solutions { |
||||
variables, |
||||
solutions, |
||||
} => Self::Solutions { |
||||
variables, |
||||
solutions: FromReadTsvSolutionsReader { |
||||
read, |
||||
inner: solutions, |
||||
buffer, |
||||
}, |
||||
}, |
||||
TsvInnerQueryResults::Boolean(value) => Self::Boolean(value), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
pub struct FromReadTsvSolutionsReader<R: Read> { |
||||
read: R, |
||||
inner: TsvInnerSolutionsReader, |
||||
buffer: Vec<u8>, |
||||
} |
||||
|
||||
impl<R: Read> FromReadTsvSolutionsReader<R> { |
||||
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
let line = self |
||||
.inner |
||||
.reader |
||||
.next_line(&mut self.buffer, &mut self.read)?; |
||||
self.inner.read_next(line) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub enum FromTokioAsyncReadTsvQueryResultsReader<R: AsyncRead + Unpin> { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: FromTokioAsyncReadTsvSolutionsReader<R>, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvQueryResultsReader<R> { |
||||
pub async fn read(mut read: R) -> Result<Self, QueryResultsParseError> { |
||||
let mut reader = LineReader::new(); |
||||
let mut buffer = Vec::new(); |
||||
let line = reader.next_line_tokio_async(&mut buffer, &mut read).await?; |
||||
Ok(match inner_read_first_line(reader, line)? { |
||||
TsvInnerQueryResults::Solutions { |
||||
variables, |
||||
solutions, |
||||
} => Self::Solutions { |
||||
variables, |
||||
solutions: FromTokioAsyncReadTsvSolutionsReader { |
||||
read, |
||||
inner: solutions, |
||||
buffer, |
||||
}, |
||||
}, |
||||
TsvInnerQueryResults::Boolean(value) => Self::Boolean(value), |
||||
}) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadTsvSolutionsReader<R: AsyncRead + Unpin> { |
||||
read: R, |
||||
inner: TsvInnerSolutionsReader, |
||||
buffer: Vec<u8>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvSolutionsReader<R> { |
||||
pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
let line = self |
||||
.inner |
||||
.reader |
||||
.next_line_tokio_async(&mut self.buffer, &mut self.read) |
||||
.await?; |
||||
self.inner.read_next(line) |
||||
} |
||||
} |
||||
|
||||
enum TsvInnerQueryResults { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: TsvInnerSolutionsReader, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
fn inner_read_first_line( |
||||
reader: LineReader, |
||||
line: &str, |
||||
) -> Result<TsvInnerQueryResults, QueryResultsParseError> { |
||||
let line = line.trim_matches(|c| matches!(c, ' ' | '\r' | '\n')); |
||||
if line.eq_ignore_ascii_case("true") { |
||||
return Ok(TsvInnerQueryResults::Boolean(true)); |
||||
} |
||||
if line.eq_ignore_ascii_case("false") { |
||||
return Ok(TsvInnerQueryResults::Boolean(false)); |
||||
} |
||||
let mut variables = Vec::new(); |
||||
if !line.is_empty() { |
||||
for v in line.split('\t') { |
||||
let v = v.trim(); |
||||
if v.is_empty() { |
||||
return Err(QueryResultsSyntaxError::msg("Empty column on the first row. The first row should be a list of variables like ?foo or $bar").into()); |
||||
} |
||||
let variable = Variable::from_str(v).map_err(|e| { |
||||
QueryResultsSyntaxError::msg(format!("Invalid variable declaration '{v}': {e}")) |
||||
})?; |
||||
if variables.contains(&variable) { |
||||
return Err(QueryResultsSyntaxError::msg(format!( |
||||
"The variable {variable} is declared twice" |
||||
)) |
||||
.into()); |
||||
} |
||||
variables.push(variable); |
||||
} |
||||
} |
||||
let column_len = variables.len(); |
||||
Ok(TsvInnerQueryResults::Solutions { |
||||
variables, |
||||
solutions: TsvInnerSolutionsReader { reader, column_len }, |
||||
}) |
||||
} |
||||
|
||||
struct TsvInnerSolutionsReader { |
||||
reader: LineReader, |
||||
column_len: usize, |
||||
} |
||||
|
||||
impl TsvInnerSolutionsReader { |
||||
#[allow(clippy::unwrap_in_result)] |
||||
pub fn read_next( |
||||
&self, |
||||
line: &str, |
||||
) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
if line.is_empty() { |
||||
return Ok(None); // EOF
|
||||
} |
||||
let elements = line |
||||
.split('\t') |
||||
.enumerate() |
||||
.map(|(i, v)| { |
||||
let v = v.trim(); |
||||
if v.is_empty() { |
||||
Ok(None) |
||||
} else { |
||||
Ok(Some(Term::from_str(v).map_err(|e| { |
||||
let start_position_char = line |
||||
.split('\t') |
||||
.take(i) |
||||
.map(|c| c.chars().count() + 1) |
||||
.sum::<usize>(); |
||||
let start_position_bytes = |
||||
line.split('\t').take(i).map(|c| c.len() + 1).sum::<usize>(); |
||||
QueryResultsSyntaxError(SyntaxErrorKind::Term { |
||||
error: e, |
||||
term: v.into(), |
||||
location: TextPosition { |
||||
line: self.reader.line_count - 1, |
||||
column: start_position_char.try_into().unwrap(), |
||||
offset: self.reader.last_line_start |
||||
+ u64::try_from(start_position_bytes).unwrap(), |
||||
}..TextPosition { |
||||
line: self.reader.line_count - 1, |
||||
column: (start_position_char + v.chars().count()) |
||||
.try_into() |
||||
.unwrap(), |
||||
offset: self.reader.last_line_start |
||||
+ u64::try_from(start_position_bytes + v.len()).unwrap(), |
||||
}, |
||||
}) |
||||
})?)) |
||||
} |
||||
}) |
||||
.collect::<Result<Vec<_>, QueryResultsParseError>>()?; |
||||
if elements.len() == self.column_len { |
||||
Ok(Some(elements)) |
||||
} else if self.column_len == 0 && elements == [None] { |
||||
Ok(Some(Vec::new())) // Zero columns case
|
||||
} else { |
||||
Err(QueryResultsSyntaxError::located_message( |
||||
format!( |
||||
"This TSV files has {} columns but we found a row on line {} with {} columns: {}", |
||||
self.column_len, |
||||
self.reader.line_count - 1, |
||||
elements.len(), |
||||
line |
||||
), |
||||
TextPosition { |
||||
line: self.reader.line_count - 1, |
||||
column: 0, |
||||
offset: self.reader.last_line_start, |
||||
}..TextPosition { |
||||
line: self.reader.line_count - 1, |
||||
column: line.chars().count().try_into().unwrap(), |
||||
offset: self.reader.last_line_end, |
||||
}, |
||||
) |
||||
.into()) |
||||
} |
||||
} |
||||
} |
||||
|
||||
struct LineReader { |
||||
buffer_start: usize, |
||||
buffer_end: usize, |
||||
line_count: u64, |
||||
last_line_start: u64, |
||||
last_line_end: u64, |
||||
} |
||||
|
||||
impl LineReader { |
||||
fn new() -> Self { |
||||
Self { |
||||
buffer_start: 0, |
||||
buffer_end: 0, |
||||
line_count: 0, |
||||
last_line_start: 0, |
||||
last_line_end: 0, |
||||
} |
||||
} |
||||
|
||||
#[allow(clippy::unwrap_in_result)] |
||||
fn next_line<'a>( |
||||
&mut self, |
||||
buffer: &'a mut Vec<u8>, |
||||
read: &mut impl Read, |
||||
) -> io::Result<&'a str> { |
||||
let line_end = loop { |
||||
if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) { |
||||
break self.buffer_start + eol + 1; |
||||
} |
||||
if self.buffer_start > 0 { |
||||
buffer.copy_within(self.buffer_start..self.buffer_end, 0); |
||||
self.buffer_end -= self.buffer_start; |
||||
self.buffer_start = 0; |
||||
} |
||||
if self.buffer_end + 1024 > buffer.len() { |
||||
if self.buffer_end + 1024 > MAX_BUFFER_SIZE { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::OutOfMemory, |
||||
format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"), |
||||
)); |
||||
} |
||||
buffer.resize(self.buffer_end + 1024, b'\0'); |
||||
} |
||||
let read = read.read(&mut buffer[self.buffer_end..])?; |
||||
if read == 0 { |
||||
break self.buffer_end; |
||||
} |
||||
self.buffer_end += read; |
||||
}; |
||||
let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| { |
||||
io::Error::new( |
||||
io::ErrorKind::InvalidData, |
||||
format!("Invalid UTF-8 in the TSV file: {e}"), |
||||
) |
||||
}); |
||||
self.line_count += 1; |
||||
self.last_line_start = self.last_line_end; |
||||
self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap(); |
||||
self.buffer_start = line_end; |
||||
result |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[allow(clippy::unwrap_in_result)] |
||||
async fn next_line_tokio_async<'a>( |
||||
&mut self, |
||||
buffer: &'a mut Vec<u8>, |
||||
read: &mut (impl AsyncRead + Unpin), |
||||
) -> io::Result<&'a str> { |
||||
let line_end = loop { |
||||
if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) { |
||||
break self.buffer_start + eol + 1; |
||||
} |
||||
if self.buffer_start > 0 { |
||||
buffer.copy_within(self.buffer_start..self.buffer_end, 0); |
||||
self.buffer_end -= self.buffer_start; |
||||
self.buffer_start = 0; |
||||
} |
||||
if self.buffer_end + 1024 > buffer.len() { |
||||
if self.buffer_end + 1024 > MAX_BUFFER_SIZE { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::OutOfMemory, |
||||
format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"), |
||||
)); |
||||
} |
||||
buffer.resize(self.buffer_end + 1024, b'\0'); |
||||
} |
||||
let read = read.read(&mut buffer[self.buffer_end..]).await?; |
||||
if read == 0 { |
||||
break self.buffer_end; |
||||
} |
||||
self.buffer_end += read; |
||||
}; |
||||
let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| { |
||||
io::Error::new( |
||||
io::ErrorKind::InvalidData, |
||||
format!("Invalid UTF-8 in the TSV file: {e}"), |
||||
) |
||||
}); |
||||
self.line_count += 1; |
||||
self.last_line_start = self.last_line_end; |
||||
self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap(); |
||||
self.buffer_start = line_end; |
||||
result |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
#[allow(clippy::panic_in_result_fn)] |
||||
mod tests { |
||||
use super::*; |
||||
use std::error::Error; |
||||
|
||||
fn build_example() -> (Vec<Variable>, Vec<Vec<Option<Term>>>) { |
||||
( |
||||
vec![ |
||||
Variable::new_unchecked("x"), |
||||
Variable::new_unchecked("literal"), |
||||
], |
||||
vec![ |
||||
vec![ |
||||
Some(NamedNode::new_unchecked("http://example/x").into()), |
||||
Some(Literal::new_simple_literal("String").into()), |
||||
], |
||||
vec![ |
||||
Some(NamedNode::new_unchecked("http://example/x").into()), |
||||
Some(Literal::new_simple_literal("String-with-dquote\"").into()), |
||||
], |
||||
vec![ |
||||
Some(BlankNode::new_unchecked("b0").into()), |
||||
Some(Literal::new_simple_literal("Blank node").into()), |
||||
], |
||||
vec![ |
||||
None, |
||||
Some(Literal::new_simple_literal("Missing 'x'").into()), |
||||
], |
||||
vec![None, None], |
||||
vec![ |
||||
Some(NamedNode::new_unchecked("http://example/x").into()), |
||||
None, |
||||
], |
||||
vec![ |
||||
Some(BlankNode::new_unchecked("b1").into()), |
||||
Some( |
||||
Literal::new_language_tagged_literal_unchecked("String-with-lang", "en") |
||||
.into(), |
||||
), |
||||
], |
||||
vec![ |
||||
Some(BlankNode::new_unchecked("b1").into()), |
||||
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()), |
||||
], |
||||
vec![ |
||||
None, |
||||
Some(Literal::new_simple_literal("escape,\t\r\n").into()), |
||||
], |
||||
], |
||||
) |
||||
} |
||||
|
||||
#[test] |
||||
fn test_csv_serialization() { |
||||
let (variables, solutions) = build_example(); |
||||
let mut buffer = String::new(); |
||||
let writer = InnerCsvSolutionsWriter::start(&mut buffer, variables.clone()); |
||||
for solution in solutions { |
||||
writer.write( |
||||
&mut buffer, |
||||
variables |
||||
.iter() |
||||
.zip(&solution) |
||||
.filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))), |
||||
); |
||||
} |
||||
assert_eq!(buffer, "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"\r\n"); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_tsv_roundtrip() -> Result<(), Box<dyn Error>> { |
||||
let (variables, solutions) = build_example(); |
||||
|
||||
// Write
|
||||
let mut buffer = String::new(); |
||||
let writer = InnerTsvSolutionsWriter::start(&mut buffer, variables.clone()); |
||||
for solution in &solutions { |
||||
writer.write( |
||||
&mut buffer, |
||||
variables |
||||
.iter() |
||||
.zip(solution) |
||||
.filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))), |
||||
); |
||||
} |
||||
assert_eq!(buffer, "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"\n"); |
||||
|
||||
// Read
|
||||
if let FromReadTsvQueryResultsReader::Solutions { |
||||
solutions: mut solutions_iter, |
||||
variables: actual_variables, |
||||
} = FromReadTsvQueryResultsReader::read(buffer.as_bytes())? |
||||
{ |
||||
assert_eq!(actual_variables.as_slice(), variables.as_slice()); |
||||
let mut rows = Vec::new(); |
||||
while let Some(row) = solutions_iter.read_next()? { |
||||
rows.push(row); |
||||
} |
||||
assert_eq!(rows, solutions); |
||||
} else { |
||||
unreachable!() |
||||
} |
||||
|
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn test_bad_tsv() { |
||||
let mut bad_tsvs = vec![ |
||||
"?", |
||||
"?p", |
||||
"?p?o", |
||||
"?p\n<", |
||||
"?p\n_", |
||||
"?p\n_:", |
||||
"?p\n\"", |
||||
"?p\n<<", |
||||
"?p\n1\t2\n", |
||||
"?p\n\n", |
||||
]; |
||||
let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000)); |
||||
bad_tsvs.push(&a_lot_of_strings); |
||||
for bad_tsv in bad_tsvs { |
||||
if let Ok(FromReadTsvQueryResultsReader::Solutions { mut solutions, .. }) = |
||||
FromReadTsvQueryResultsReader::read(bad_tsv.as_bytes()) |
||||
{ |
||||
while let Ok(Some(_)) = solutions.read_next() {} |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_columns_csv_serialization() { |
||||
let mut buffer = String::new(); |
||||
let writer = InnerCsvSolutionsWriter::start(&mut buffer, Vec::new()); |
||||
writer.write(&mut buffer, []); |
||||
assert_eq!(buffer, "\r\n\r\n"); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_columns_tsv_serialization() { |
||||
let mut buffer = String::new(); |
||||
let writer = InnerTsvSolutionsWriter::start(&mut buffer, Vec::new()); |
||||
writer.write(&mut buffer, []); |
||||
assert_eq!(buffer, "\n\n"); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_columns_tsv_parsing() -> io::Result<()> { |
||||
if let FromReadTsvQueryResultsReader::Solutions { |
||||
mut solutions, |
||||
variables, |
||||
} = FromReadTsvQueryResultsReader::read(b"\n\n".as_slice())? |
||||
{ |
||||
assert_eq!(variables, Vec::<Variable>::new()); |
||||
assert_eq!(solutions.read_next()?, Some(Vec::new())); |
||||
assert_eq!(solutions.read_next()?, None); |
||||
} else { |
||||
unreachable!() |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_results_csv_serialization() { |
||||
let mut buffer = String::new(); |
||||
InnerCsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]); |
||||
assert_eq!(buffer, "a\r\n"); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_results_tsv_serialization() { |
||||
let mut buffer = String::new(); |
||||
InnerTsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]); |
||||
assert_eq!(buffer, "?a\n"); |
||||
} |
||||
|
||||
#[test] |
||||
fn test_no_results_tsv_parsing() -> io::Result<()> { |
||||
if let FromReadTsvQueryResultsReader::Solutions { |
||||
mut solutions, |
||||
variables, |
||||
} = FromReadTsvQueryResultsReader::read(b"?a\n".as_slice())? |
||||
{ |
||||
assert_eq!(variables, vec![Variable::new_unchecked("a")]); |
||||
assert_eq!(solutions.read_next()?, None); |
||||
} else { |
||||
unreachable!() |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
@ -0,0 +1,157 @@ |
||||
use crate::oxrdf::TermParseError; |
||||
use std::io; |
||||
use std::ops::Range; |
||||
use std::sync::Arc; |
||||
|
||||
/// Error returned during SPARQL result formats format parsing.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub enum QueryResultsParseError { |
||||
/// I/O error during parsing (file not found...).
|
||||
#[error(transparent)] |
||||
Io(#[from] io::Error), |
||||
/// An error in the file syntax.
|
||||
#[error(transparent)] |
||||
Syntax(#[from] QueryResultsSyntaxError), |
||||
} |
||||
|
||||
impl From<QueryResultsParseError> for io::Error { |
||||
#[inline] |
||||
fn from(error: QueryResultsParseError) -> Self { |
||||
match error { |
||||
QueryResultsParseError::Io(error) => error, |
||||
QueryResultsParseError::Syntax(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<json_event_parser::ParseError> for QueryResultsParseError { |
||||
fn from(error: json_event_parser::ParseError) -> Self { |
||||
match error { |
||||
json_event_parser::ParseError::Syntax(error) => { |
||||
QueryResultsSyntaxError::from(error).into() |
||||
} |
||||
json_event_parser::ParseError::Io(error) => error.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::Error> for QueryResultsParseError { |
||||
#[inline] |
||||
fn from(error: quick_xml::Error) -> Self { |
||||
match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e))) |
||||
} |
||||
_ => Self::Syntax(QueryResultsSyntaxError(SyntaxErrorKind::Xml(error))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::escape::EscapeError> for QueryResultsParseError { |
||||
#[inline] |
||||
fn from(error: quick_xml::escape::EscapeError) -> Self { |
||||
quick_xml::Error::from(error).into() |
||||
} |
||||
} |
||||
/// An error in the syntax of the parsed file.
|
||||
#[derive(Debug, thiserror::Error)] |
||||
#[error(transparent)] |
||||
pub struct QueryResultsSyntaxError(#[from] pub(crate) SyntaxErrorKind); |
||||
|
||||
#[derive(Debug, thiserror::Error)] |
||||
pub(crate) enum SyntaxErrorKind { |
||||
#[error(transparent)] |
||||
Json(#[from] json_event_parser::SyntaxError), |
||||
#[error(transparent)] |
||||
Xml(#[from] quick_xml::Error), |
||||
#[error("Error {error} on '{term}' in line {}", location.start.line + 1)] |
||||
Term { |
||||
#[source] |
||||
error: TermParseError, |
||||
term: String, |
||||
location: Range<TextPosition>, |
||||
}, |
||||
#[error("{msg}")] |
||||
Msg { |
||||
msg: String, |
||||
location: Option<Range<TextPosition>>, |
||||
}, |
||||
} |
||||
|
||||
impl QueryResultsSyntaxError { |
||||
/// Builds an error from a printable error message.
|
||||
#[inline] |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self(SyntaxErrorKind::Msg { |
||||
msg: msg.into(), |
||||
location: None, |
||||
}) |
||||
} |
||||
|
||||
/// Builds an error from a printable error message and a location
|
||||
#[inline] |
||||
pub(crate) fn located_message(msg: impl Into<String>, location: Range<TextPosition>) -> Self { |
||||
Self(SyntaxErrorKind::Msg { |
||||
msg: msg.into(), |
||||
location: Some(location), |
||||
}) |
||||
} |
||||
|
||||
/// The location of the error inside of the file.
|
||||
#[inline] |
||||
pub fn location(&self) -> Option<Range<TextPosition>> { |
||||
match &self.0 { |
||||
SyntaxErrorKind::Json(e) => { |
||||
let location = e.location(); |
||||
Some( |
||||
TextPosition { |
||||
line: location.start.line, |
||||
column: location.start.column, |
||||
offset: location.start.offset, |
||||
}..TextPosition { |
||||
line: location.end.line, |
||||
column: location.end.column, |
||||
offset: location.end.offset, |
||||
}, |
||||
) |
||||
} |
||||
SyntaxErrorKind::Term { location, .. } => Some(location.clone()), |
||||
SyntaxErrorKind::Msg { location, .. } => location.clone(), |
||||
SyntaxErrorKind::Xml(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<QueryResultsSyntaxError> for io::Error { |
||||
#[inline] |
||||
fn from(error: QueryResultsSyntaxError) -> Self { |
||||
match error.0 { |
||||
SyntaxErrorKind::Json(error) => Self::new(io::ErrorKind::InvalidData, error), |
||||
SyntaxErrorKind::Xml(error) => match error { |
||||
quick_xml::Error::Io(error) => { |
||||
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e)) |
||||
} |
||||
quick_xml::Error::UnexpectedEof(error) => { |
||||
Self::new(io::ErrorKind::UnexpectedEof, error) |
||||
} |
||||
_ => Self::new(io::ErrorKind::InvalidData, error), |
||||
}, |
||||
SyntaxErrorKind::Term { .. } => Self::new(io::ErrorKind::InvalidData, error), |
||||
SyntaxErrorKind::Msg { msg, .. } => Self::new(io::ErrorKind::InvalidData, msg), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<json_event_parser::SyntaxError> for QueryResultsSyntaxError { |
||||
fn from(error: json_event_parser::SyntaxError) -> Self { |
||||
Self(SyntaxErrorKind::Json(error)) |
||||
} |
||||
} |
||||
|
||||
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)] |
||||
pub struct TextPosition { |
||||
pub line: u64, |
||||
pub column: u64, |
||||
pub offset: u64, |
||||
} |
@ -0,0 +1,176 @@ |
||||
use std::fmt; |
||||
|
||||
/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] |
||||
#[non_exhaustive] |
||||
pub enum QueryResultsFormat { |
||||
/// [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)
|
||||
Xml, |
||||
/// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
|
||||
Json, |
||||
/// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
|
||||
Csv, |
||||
/// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
|
||||
Tsv, |
||||
} |
||||
|
||||
impl QueryResultsFormat { |
||||
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// QueryResultsFormat::Json.iri(),
|
||||
/// "http://www.w3.org/ns/formats/SPARQL_Results_JSON"
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn iri(self) -> &'static str { |
||||
match self { |
||||
Self::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML", |
||||
Self::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON", |
||||
Self::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV", |
||||
Self::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// QueryResultsFormat::Json.media_type(),
|
||||
/// "application/sparql-results+json"
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn media_type(self) -> &'static str { |
||||
match self { |
||||
Self::Xml => "application/sparql-results+xml", |
||||
Self::Json => "application/sparql-results+json", |
||||
Self::Csv => "text/csv; charset=utf-8", |
||||
Self::Tsv => "text/tab-separated-values; charset=utf-8", |
||||
} |
||||
} |
||||
|
||||
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj")
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn file_extension(self) -> &'static str { |
||||
match self { |
||||
Self::Xml => "srx", |
||||
Self::Json => "srj", |
||||
Self::Csv => "csv", |
||||
Self::Tsv => "tsv", |
||||
} |
||||
} |
||||
|
||||
/// The format name.
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(QueryResultsFormat::Json.name(), "SPARQL Results in JSON")
|
||||
/// ```
|
||||
#[inline] |
||||
pub const fn name(self) -> &'static str { |
||||
match self { |
||||
Self::Xml => "SPARQL Results in XML", |
||||
Self::Json => "SPARQL Results in JSON", |
||||
Self::Csv => "SPARQL Results in CSV", |
||||
Self::Tsv => "SPARQL Results in TSV", |
||||
} |
||||
} |
||||
|
||||
/// Looks for a known format from a media type.
|
||||
///
|
||||
/// It supports some media type aliases.
|
||||
/// For example, "application/xml" is going to return `Xml` even if it is not its canonical media type.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"),
|
||||
/// Some(QueryResultsFormat::Json)
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_media_type(media_type: &str) -> Option<Self> { |
||||
const MEDIA_SUBTYPES: [(&str, QueryResultsFormat); 8] = [ |
||||
("csv", QueryResultsFormat::Csv), |
||||
("json", QueryResultsFormat::Json), |
||||
("plain", QueryResultsFormat::Csv), |
||||
("sparql-results+json", QueryResultsFormat::Json), |
||||
("sparql-results+xml", QueryResultsFormat::Xml), |
||||
("tab-separated-values", QueryResultsFormat::Tsv), |
||||
("tsv", QueryResultsFormat::Tsv), |
||||
("xml", QueryResultsFormat::Xml), |
||||
]; |
||||
|
||||
let (r#type, subtype) = media_type |
||||
.split_once(';') |
||||
.unwrap_or((media_type, "")) |
||||
.0 |
||||
.trim() |
||||
.split_once('/')?; |
||||
let r#type = r#type.trim(); |
||||
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { |
||||
return None; |
||||
} |
||||
let subtype = subtype.trim(); |
||||
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); |
||||
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { |
||||
if candidate_subtype.eq_ignore_ascii_case(subtype) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
/// Looks for a known format from an extension.
|
||||
///
|
||||
/// It supports some aliases.
|
||||
///
|
||||
/// Example:
|
||||
/// ```
|
||||
/// use sparesults::QueryResultsFormat;
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// QueryResultsFormat::from_extension("json"),
|
||||
/// Some(QueryResultsFormat::Json)
|
||||
/// )
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn from_extension(extension: &str) -> Option<Self> { |
||||
const MEDIA_TYPES: [(&str, QueryResultsFormat); 7] = [ |
||||
("csv", QueryResultsFormat::Csv), |
||||
("json", QueryResultsFormat::Json), |
||||
("srj", QueryResultsFormat::Json), |
||||
("srx", QueryResultsFormat::Xml), |
||||
("tsv", QueryResultsFormat::Tsv), |
||||
("txt", QueryResultsFormat::Csv), |
||||
("xml", QueryResultsFormat::Xml), |
||||
]; |
||||
for (candidate_extension, candidate_id) in MEDIA_TYPES { |
||||
if candidate_extension.eq_ignore_ascii_case(extension) { |
||||
return Some(candidate_id); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for QueryResultsFormat { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
f.write_str(self.name()) |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,16 @@ |
||||
mod csv; |
||||
mod error; |
||||
mod format; |
||||
mod json; |
||||
mod parser; |
||||
mod serializer; |
||||
pub mod solution; |
||||
mod xml; |
||||
|
||||
pub use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError, TextPosition}; |
||||
pub use crate::sparesults::format::QueryResultsFormat; |
||||
pub use crate::sparesults::parser::{ |
||||
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsParser, |
||||
}; |
||||
pub use crate::sparesults::serializer::{QueryResultsSerializer, ToWriteSolutionsWriter}; |
||||
pub use crate::sparesults::solution::QuerySolution; |
@ -0,0 +1,460 @@ |
||||
use crate::oxrdf::Variable; |
||||
use crate::sparesults::csv::{FromReadTsvQueryResultsReader, FromReadTsvSolutionsReader}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::csv::{ |
||||
FromTokioAsyncReadTsvQueryResultsReader, FromTokioAsyncReadTsvSolutionsReader, |
||||
}; |
||||
use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; |
||||
use crate::sparesults::format::QueryResultsFormat; |
||||
use crate::sparesults::json::{FromReadJsonQueryResultsReader, FromReadJsonSolutionsReader}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::json::{ |
||||
FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader, |
||||
}; |
||||
use crate::sparesults::solution::QuerySolution; |
||||
use crate::sparesults::xml::{FromReadXmlQueryResultsReader, FromReadXmlSolutionsReader}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::xml::{ |
||||
FromTokioAsyncReadXmlQueryResultsReader, FromTokioAsyncReadXmlSolutionsReader, |
||||
}; |
||||
use std::io::Read; |
||||
use std::sync::Arc; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncRead; |
||||
|
||||
/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)).
|
||||
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)).
|
||||
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)).
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
///
|
||||
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
||||
/// // boolean
|
||||
/// if let FromReadQueryResultsReader::Boolean(v) = json_parser.parse_read(br#"{"boolean":true}"#.as_slice())? {
|
||||
/// assert_eq!(v, true);
|
||||
/// }
|
||||
/// // solutions
|
||||
/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? {
|
||||
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
|
||||
/// for solution in solutions {
|
||||
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
|
||||
/// ```
|
||||
pub struct QueryResultsParser { |
||||
format: QueryResultsFormat, |
||||
} |
||||
|
||||
impl QueryResultsParser { |
||||
/// Builds a parser for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: QueryResultsFormat) -> Self { |
||||
Self { format } |
||||
} |
||||
|
||||
/// Reads a result file from a [`Read`] implementation.
|
||||
///
|
||||
/// Reads are automatically buffered.
|
||||
///
|
||||
/// Example in XML (the API is the same for JSON and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
///
|
||||
/// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
|
||||
///
|
||||
/// // boolean
|
||||
/// if let FromReadQueryResultsReader::Boolean(v) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice())? {
|
||||
/// assert_eq!(v, true);
|
||||
/// }
|
||||
///
|
||||
/// // solutions
|
||||
/// if let FromReadQueryResultsReader::Solutions(solutions) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice())? {
|
||||
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
|
||||
/// for solution in solutions {
|
||||
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
|
||||
/// ```
|
||||
pub fn parse_read<R: Read>( |
||||
&self, |
||||
reader: R, |
||||
) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> { |
||||
Ok(match self.format { |
||||
QueryResultsFormat::Xml => match FromReadXmlQueryResultsReader::read(reader)? { |
||||
FromReadXmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), |
||||
FromReadXmlQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromReadSolutionsReaderKind::Xml(solutions), |
||||
}), |
||||
}, |
||||
QueryResultsFormat::Json => match FromReadJsonQueryResultsReader::read(reader)? { |
||||
FromReadJsonQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), |
||||
FromReadJsonQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromReadSolutionsReaderKind::Json(solutions), |
||||
}), |
||||
}, |
||||
QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()), |
||||
QueryResultsFormat::Tsv => match FromReadTsvQueryResultsReader::read(reader)? { |
||||
FromReadTsvQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r), |
||||
FromReadTsvQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromReadSolutionsReaderKind::Tsv(solutions), |
||||
}), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
#[deprecated(note = "use parse_read", since = "0.4.0")] |
||||
pub fn read_results<R: Read>( |
||||
&self, |
||||
reader: R, |
||||
) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> { |
||||
self.parse_read(reader) |
||||
} |
||||
|
||||
/// Reads a result file from a Tokio [`AsyncRead`] implementation.
|
||||
///
|
||||
/// Reads are automatically buffered.
|
||||
///
|
||||
/// Example in XML (the API is the same for JSON and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader};
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
|
||||
/// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
|
||||
///
|
||||
/// // boolean
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice()).await? {
|
||||
/// assert_eq!(v, true);
|
||||
/// }
|
||||
///
|
||||
/// // solutions
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice()).await? {
|
||||
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
|
||||
/// while let Some(solution) = solutions.next().await {
|
||||
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
|
||||
/// }
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn parse_tokio_async_read<R: AsyncRead + Unpin>( |
||||
&self, |
||||
reader: R, |
||||
) -> Result<FromTokioAsyncReadQueryResultsReader<R>, QueryResultsParseError> { |
||||
Ok(match self.format { |
||||
QueryResultsFormat::Xml => match FromTokioAsyncReadXmlQueryResultsReader::read(reader).await? { |
||||
FromTokioAsyncReadXmlQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), |
||||
FromTokioAsyncReadXmlQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromTokioAsyncReadSolutionsReaderKind::Xml(solutions), |
||||
}), |
||||
}, |
||||
QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? { |
||||
FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), |
||||
FromTokioAsyncReadJsonQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromTokioAsyncReadSolutionsReaderKind::Json(solutions), |
||||
}), |
||||
}, |
||||
QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()), |
||||
QueryResultsFormat::Tsv => match FromTokioAsyncReadTsvQueryResultsReader::read(reader).await? { |
||||
FromTokioAsyncReadTsvQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r), |
||||
FromTokioAsyncReadTsvQueryResultsReader::Solutions { |
||||
solutions, |
||||
variables, |
||||
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader { |
||||
variables: variables.into(), |
||||
solutions: FromTokioAsyncReadSolutionsReaderKind::Tsv(solutions), |
||||
}), |
||||
}, |
||||
}) |
||||
} |
||||
} |
||||
|
||||
impl From<QueryResultsFormat> for QueryResultsParser { |
||||
fn from(format: QueryResultsFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// The reader for a given read of a results file.
|
||||
///
|
||||
/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]).
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON and XML):
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser};
|
||||
///
|
||||
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
|
||||
///
|
||||
/// // boolean
|
||||
/// if let FromReadQueryResultsReader::Boolean(v) = tsv_parser.parse_read(b"true".as_slice())? {
|
||||
/// assert_eq!(v, true);
|
||||
/// }
|
||||
///
|
||||
/// // solutions
|
||||
/// if let FromReadQueryResultsReader::Solutions(solutions) =
|
||||
/// tsv_parser.parse_read(b"?foo\t?bar\n\"test\"\t".as_slice())?
|
||||
/// {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar")
|
||||
/// ]
|
||||
/// );
|
||||
/// for solution in solutions {
|
||||
/// assert_eq!(
|
||||
/// solution?.iter().collect::<Vec<_>>(),
|
||||
/// vec![(
|
||||
/// &Variable::new_unchecked("foo"),
|
||||
/// &Literal::from("test").into()
|
||||
/// )]
|
||||
/// );
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
|
||||
/// ```
|
||||
pub enum FromReadQueryResultsReader<R: Read> { |
||||
Solutions(FromReadSolutionsReader<R>), |
||||
Boolean(bool), |
||||
} |
||||
|
||||
/// A streaming reader of a set of [`QuerySolution`] solutions.
|
||||
///
|
||||
/// It implements the [`Iterator`] API to iterate over the solutions.
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
///
|
||||
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
||||
/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? {
|
||||
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
|
||||
/// for solution in solutions {
|
||||
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
|
||||
/// }
|
||||
/// }
|
||||
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
|
||||
/// ```
|
||||
pub struct FromReadSolutionsReader<R: Read> { |
||||
variables: Arc<[Variable]>, |
||||
solutions: FromReadSolutionsReaderKind<R>, |
||||
} |
||||
|
||||
enum FromReadSolutionsReaderKind<R: Read> { |
||||
Xml(FromReadXmlSolutionsReader<R>), |
||||
Json(FromReadJsonSolutionsReader<R>), |
||||
Tsv(FromReadTsvSolutionsReader<R>), |
||||
} |
||||
|
||||
impl<R: Read> FromReadSolutionsReader<R> { |
||||
/// Ordered list of the declared variables at the beginning of the results.
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON and XML):
|
||||
/// ```
|
||||
/// use oxrdf::Variable;
|
||||
/// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser};
|
||||
///
|
||||
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
|
||||
/// if let FromReadQueryResultsReader::Solutions(solutions) =
|
||||
/// tsv_parser.parse_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice())?
|
||||
/// {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar")
|
||||
/// ]
|
||||
/// );
|
||||
/// }
|
||||
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn variables(&self) -> &[Variable] { |
||||
&self.variables |
||||
} |
||||
} |
||||
|
||||
impl<R: Read> Iterator for FromReadSolutionsReader<R> { |
||||
type Item = Result<QuerySolution, QueryResultsParseError>; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
Some( |
||||
match &mut self.solutions { |
||||
FromReadSolutionsReaderKind::Xml(reader) => reader.read_next(), |
||||
FromReadSolutionsReaderKind::Json(reader) => reader.read_next(), |
||||
FromReadSolutionsReaderKind::Tsv(reader) => reader.read_next(), |
||||
} |
||||
.transpose()? |
||||
.map(|values| (Arc::clone(&self.variables), values).into()), |
||||
) |
||||
} |
||||
} |
||||
|
||||
/// The reader for a given read of a results file.
|
||||
///
|
||||
/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]).
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON and XML):
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::{
|
||||
/// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser,
|
||||
/// };
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
|
||||
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
|
||||
///
|
||||
/// // boolean
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = tsv_parser
|
||||
/// .parse_tokio_async_read(b"true".as_slice())
|
||||
/// .await?
|
||||
/// {
|
||||
/// assert_eq!(v, true);
|
||||
/// }
|
||||
///
|
||||
/// // solutions
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = tsv_parser
|
||||
/// .parse_tokio_async_read(b"?foo\t?bar\n\"test\"\t".as_slice())
|
||||
/// .await?
|
||||
/// {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar")
|
||||
/// ]
|
||||
/// );
|
||||
/// while let Some(solution) = solutions.next().await {
|
||||
/// assert_eq!(
|
||||
/// solution?.iter().collect::<Vec<_>>(),
|
||||
/// vec![(
|
||||
/// &Variable::new_unchecked("foo"),
|
||||
/// &Literal::from("test").into()
|
||||
/// )]
|
||||
/// );
|
||||
/// }
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub enum FromTokioAsyncReadQueryResultsReader<R: AsyncRead + Unpin> { |
||||
Solutions(FromTokioAsyncReadSolutionsReader<R>), |
||||
Boolean(bool), |
||||
} |
||||
|
||||
/// A streaming reader of a set of [`QuerySolution`] solutions.
|
||||
///
|
||||
/// It implements the [`Iterator`] API to iterate over the solutions.
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader};
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
|
||||
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = json_parser.parse_tokio_async_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).await? {
|
||||
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
|
||||
/// while let Some(solution) = solutions.next().await {
|
||||
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
|
||||
/// }
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadSolutionsReader<R: AsyncRead + Unpin> { |
||||
variables: Arc<[Variable]>, |
||||
solutions: FromTokioAsyncReadSolutionsReaderKind<R>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum FromTokioAsyncReadSolutionsReaderKind<R: AsyncRead + Unpin> { |
||||
Json(FromTokioAsyncReadJsonSolutionsReader<R>), |
||||
Xml(FromTokioAsyncReadXmlSolutionsReader<R>), |
||||
Tsv(FromTokioAsyncReadTsvSolutionsReader<R>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadSolutionsReader<R> { |
||||
/// Ordered list of the declared variables at the beginning of the results.
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON and XML):
|
||||
/// ```
|
||||
/// use oxrdf::Variable;
|
||||
/// use sparesults::{
|
||||
/// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser,
|
||||
/// };
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
|
||||
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
|
||||
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(solutions) = tsv_parser
|
||||
/// .parse_tokio_async_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice())
|
||||
/// .await?
|
||||
/// {
|
||||
/// assert_eq!(
|
||||
/// solutions.variables(),
|
||||
/// &[
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar")
|
||||
/// ]
|
||||
/// );
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn variables(&self) -> &[Variable] { |
||||
&self.variables |
||||
} |
||||
|
||||
/// Reads the next solution or returns `None` if the file is finished.
|
||||
pub async fn next(&mut self) -> Option<Result<QuerySolution, QueryResultsParseError>> { |
||||
Some( |
||||
match &mut self.solutions { |
||||
FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await, |
||||
FromTokioAsyncReadSolutionsReaderKind::Xml(reader) => reader.read_next().await, |
||||
FromTokioAsyncReadSolutionsReaderKind::Tsv(reader) => reader.read_next().await, |
||||
} |
||||
.transpose()? |
||||
.map(|values| (Arc::clone(&self.variables), values).into()), |
||||
) |
||||
} |
||||
} |
@ -0,0 +1,427 @@ |
||||
use crate::oxrdf::{TermRef, Variable, VariableRef}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::csv::{ |
||||
tokio_async_write_boolean_csv_result, ToTokioAsyncWriteCsvSolutionsWriter, |
||||
ToTokioAsyncWriteTsvSolutionsWriter, |
||||
}; |
||||
use crate::sparesults::csv::{ |
||||
write_boolean_csv_result, ToWriteCsvSolutionsWriter, ToWriteTsvSolutionsWriter, |
||||
}; |
||||
use crate::sparesults::format::QueryResultsFormat; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::json::{ |
||||
tokio_async_write_boolean_json_result, ToTokioAsyncWriteJsonSolutionsWriter, |
||||
}; |
||||
use crate::sparesults::json::{write_boolean_json_result, ToWriteJsonSolutionsWriter}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use crate::sparesults::xml::{ |
||||
tokio_async_write_boolean_xml_result, ToTokioAsyncWriteXmlSolutionsWriter, |
||||
}; |
||||
use crate::sparesults::xml::{write_boolean_xml_result, ToWriteXmlSolutionsWriter}; |
||||
use std::io::{self, Write}; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::AsyncWrite; |
||||
|
||||
/// A serializer for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
|
||||
///
|
||||
/// It currently supports the following formats:
|
||||
/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml))
|
||||
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json))
|
||||
/// * [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Csv`](QueryResultsFormat::Csv))
|
||||
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv))
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
/// use oxrdf::{LiteralRef, Variable, VariableRef};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
|
||||
///
|
||||
/// // boolean
|
||||
/// let mut buffer = Vec::new();
|
||||
/// json_serializer.serialize_boolean_to_write(&mut buffer, true)?;
|
||||
/// assert_eq!(buffer, br#"{"head":{},"boolean":true}"#);
|
||||
///
|
||||
/// // solutions
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
|
||||
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
|
||||
/// writer.finish()?;
|
||||
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#);
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub struct QueryResultsSerializer { |
||||
format: QueryResultsFormat, |
||||
} |
||||
|
||||
impl QueryResultsSerializer { |
||||
/// Builds a serializer for the given format.
|
||||
#[inline] |
||||
pub fn from_format(format: QueryResultsFormat) -> Self { |
||||
Self { format } |
||||
} |
||||
|
||||
/// Write a boolean query result (from an `ASK` query) into the given [`Write`] implementation.
|
||||
///
|
||||
/// Example in XML (the API is the same for JSON, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
///
|
||||
/// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// xml_serializer.serialize_boolean_to_write(&mut buffer, true)?;
|
||||
/// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head></head><boolean>true</boolean></sparql>"#);
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_boolean_to_write<W: Write>(&self, write: W, value: bool) -> io::Result<W> { |
||||
match self.format { |
||||
QueryResultsFormat::Xml => write_boolean_xml_result(write, value), |
||||
QueryResultsFormat::Json => write_boolean_json_result(write, value), |
||||
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => { |
||||
write_boolean_csv_result(write, value) |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Write a boolean query result (from an `ASK` query) into the given [`AsyncWrite`] implementation.
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// json_serializer
|
||||
/// .serialize_boolean_to_tokio_async_write(&mut buffer, false)
|
||||
/// .await?;
|
||||
/// assert_eq!(buffer, br#"{"head":{},"boolean":false}"#);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn serialize_boolean_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
&self, |
||||
write: W, |
||||
value: bool, |
||||
) -> io::Result<W> { |
||||
match self.format { |
||||
QueryResultsFormat::Xml => tokio_async_write_boolean_xml_result(write, value).await, |
||||
QueryResultsFormat::Json => tokio_async_write_boolean_json_result(write, value).await, |
||||
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => { |
||||
tokio_async_write_boolean_csv_result(write, value).await |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[deprecated(note = "use serialize_boolean_to_write", since = "0.4.0")] |
||||
pub fn write_boolean_result<W: Write>(&self, writer: W, value: bool) -> io::Result<W> { |
||||
self.serialize_boolean_to_write(writer, value) |
||||
} |
||||
|
||||
/// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// Example in XML (the API is the same for JSON, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
/// use oxrdf::{LiteralRef, Variable, VariableRef};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = xml_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
|
||||
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
|
||||
/// writer.finish()?;
|
||||
/// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#);
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn serialize_solutions_to_write<W: Write>( |
||||
&self, |
||||
write: W, |
||||
variables: Vec<Variable>, |
||||
) -> io::Result<ToWriteSolutionsWriter<W>> { |
||||
Ok(ToWriteSolutionsWriter { |
||||
formatter: match self.format { |
||||
QueryResultsFormat::Xml => ToWriteSolutionsWriterKind::Xml( |
||||
ToWriteXmlSolutionsWriter::start(write, &variables)?, |
||||
), |
||||
QueryResultsFormat::Json => ToWriteSolutionsWriterKind::Json( |
||||
ToWriteJsonSolutionsWriter::start(write, &variables)?, |
||||
), |
||||
QueryResultsFormat::Csv => ToWriteSolutionsWriterKind::Csv( |
||||
ToWriteCsvSolutionsWriter::start(write, variables)?, |
||||
), |
||||
QueryResultsFormat::Tsv => ToWriteSolutionsWriterKind::Tsv( |
||||
ToWriteTsvSolutionsWriter::start(write, variables)?, |
||||
), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
/// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation.
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// Example in XML (the API is the same for JSON, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
/// use oxrdf::{LiteralRef, Variable, VariableRef};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?;
|
||||
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?;
|
||||
/// writer.finish().await?;
|
||||
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn serialize_solutions_to_tokio_async_write<W: AsyncWrite + Unpin>( |
||||
&self, |
||||
write: W, |
||||
variables: Vec<Variable>, |
||||
) -> io::Result<ToTokioAsyncWriteSolutionsWriter<W>> { |
||||
Ok(ToTokioAsyncWriteSolutionsWriter { |
||||
formatter: match self.format { |
||||
QueryResultsFormat::Xml => ToTokioAsyncWriteSolutionsWriterKind::Xml( |
||||
ToTokioAsyncWriteXmlSolutionsWriter::start(write, &variables).await?, |
||||
), |
||||
QueryResultsFormat::Json => ToTokioAsyncWriteSolutionsWriterKind::Json( |
||||
ToTokioAsyncWriteJsonSolutionsWriter::start(write, &variables).await?, |
||||
), |
||||
QueryResultsFormat::Csv => ToTokioAsyncWriteSolutionsWriterKind::Csv( |
||||
ToTokioAsyncWriteCsvSolutionsWriter::start(write, variables).await?, |
||||
), |
||||
QueryResultsFormat::Tsv => ToTokioAsyncWriteSolutionsWriterKind::Tsv( |
||||
ToTokioAsyncWriteTsvSolutionsWriter::start(write, variables).await?, |
||||
), |
||||
}, |
||||
}) |
||||
} |
||||
|
||||
#[deprecated(note = "use serialize_solutions_to_write", since = "0.4.0")] |
||||
pub fn solutions_writer<W: Write>( |
||||
&self, |
||||
writer: W, |
||||
variables: Vec<Variable>, |
||||
) -> io::Result<ToWriteSolutionsWriter<W>> { |
||||
self.serialize_solutions_to_write(writer, variables) |
||||
} |
||||
} |
||||
|
||||
impl From<QueryResultsFormat> for QueryResultsSerializer { |
||||
fn from(format: QueryResultsFormat) -> Self { |
||||
Self::from_format(format) |
||||
} |
||||
} |
||||
|
||||
/// Allows writing query results into a [`Write`] implementation.
|
||||
///
|
||||
/// Could be built using a [`QueryResultsSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON, XML and CSV):
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, Variable, VariableRef};
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = tsv_serializer.serialize_solutions_to_write(
|
||||
/// &mut buffer,
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// )?;
|
||||
/// writer.write(once((
|
||||
/// VariableRef::new_unchecked("foo"),
|
||||
/// LiteralRef::from("test"),
|
||||
/// )))?;
|
||||
/// writer.finish()?;
|
||||
/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n");
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
#[must_use] |
||||
pub struct ToWriteSolutionsWriter<W: Write> { |
||||
formatter: ToWriteSolutionsWriterKind<W>, |
||||
} |
||||
|
||||
enum ToWriteSolutionsWriterKind<W: Write> { |
||||
Xml(ToWriteXmlSolutionsWriter<W>), |
||||
Json(ToWriteJsonSolutionsWriter<W>), |
||||
Csv(ToWriteCsvSolutionsWriter<W>), |
||||
Tsv(ToWriteTsvSolutionsWriter<W>), |
||||
} |
||||
|
||||
impl<W: Write> ToWriteSolutionsWriter<W> { |
||||
/// Writes a solution.
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution};
|
||||
/// use oxrdf::{Literal, LiteralRef, Variable, VariableRef};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
|
||||
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
|
||||
/// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())])))?;
|
||||
/// writer.finish()?;
|
||||
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#);
|
||||
/// # std::io::Result::Ok(())
|
||||
/// ```
|
||||
pub fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>, |
||||
) -> io::Result<()> { |
||||
let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into())); |
||||
match &mut self.formatter { |
||||
ToWriteSolutionsWriterKind::Xml(writer) => writer.write(solution), |
||||
ToWriteSolutionsWriterKind::Json(writer) => writer.write(solution), |
||||
ToWriteSolutionsWriterKind::Csv(writer) => writer.write(solution), |
||||
ToWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution), |
||||
} |
||||
} |
||||
|
||||
/// Writes the last bytes of the file.
|
||||
pub fn finish(self) -> io::Result<W> { |
||||
match self.formatter { |
||||
ToWriteSolutionsWriterKind::Xml(write) => write.finish(), |
||||
ToWriteSolutionsWriterKind::Json(write) => write.finish(), |
||||
ToWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()), |
||||
ToWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()), |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Allows writing query results into an [`AsyncWrite`] implementation.
|
||||
|
||||
/// Could be built using a [`QueryResultsSerializer`].
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// Do not forget to run the [`finish`](ToTokioAsyncWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
|
||||
///
|
||||
/// <div class="warning">
|
||||
///
|
||||
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
|
||||
///
|
||||
/// Example in TSV (the API is the same for JSON, CSV and XML):
|
||||
/// ```
|
||||
/// use oxrdf::{LiteralRef, Variable, VariableRef};
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = tsv_serializer
|
||||
/// .serialize_solutions_to_tokio_async_write(
|
||||
/// &mut buffer,
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// )
|
||||
/// .await?;
|
||||
/// writer
|
||||
/// .write(once((
|
||||
/// VariableRef::new_unchecked("foo"),
|
||||
/// LiteralRef::from("test"),
|
||||
/// )))
|
||||
/// .await?;
|
||||
/// writer.finish().await?;
|
||||
/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "async-tokio")] |
||||
#[must_use] |
||||
pub struct ToTokioAsyncWriteSolutionsWriter<W: AsyncWrite + Unpin> { |
||||
formatter: ToTokioAsyncWriteSolutionsWriterKind<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
enum ToTokioAsyncWriteSolutionsWriterKind<W: AsyncWrite + Unpin> { |
||||
Xml(ToTokioAsyncWriteXmlSolutionsWriter<W>), |
||||
Json(ToTokioAsyncWriteJsonSolutionsWriter<W>), |
||||
Csv(ToTokioAsyncWriteCsvSolutionsWriter<W>), |
||||
Tsv(ToTokioAsyncWriteTsvSolutionsWriter<W>), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteSolutionsWriter<W> { |
||||
/// Writes a solution.
|
||||
///
|
||||
/// Example in JSON (the API is the same for XML, CSV and TSV):
|
||||
/// ```
|
||||
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution};
|
||||
/// use oxrdf::{Literal, LiteralRef, Variable, VariableRef};
|
||||
/// use std::iter::once;
|
||||
///
|
||||
/// # #[tokio::main(flavor = "current_thread")]
|
||||
/// # async fn main() -> std::io::Result<()> {
|
||||
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
|
||||
/// let mut buffer = Vec::new();
|
||||
/// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?;
|
||||
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?;
|
||||
/// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())]))).await?;
|
||||
/// writer.finish().await?;
|
||||
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>, |
||||
) -> io::Result<()> { |
||||
let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into())); |
||||
match &mut self.formatter { |
||||
ToTokioAsyncWriteSolutionsWriterKind::Xml(writer) => writer.write(solution).await, |
||||
ToTokioAsyncWriteSolutionsWriterKind::Json(writer) => writer.write(solution).await, |
||||
ToTokioAsyncWriteSolutionsWriterKind::Csv(writer) => writer.write(solution).await, |
||||
ToTokioAsyncWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution).await, |
||||
} |
||||
} |
||||
|
||||
/// Writes the last bytes of the file.
|
||||
pub async fn finish(self) -> io::Result<W> { |
||||
match self.formatter { |
||||
ToTokioAsyncWriteSolutionsWriterKind::Xml(write) => write.finish().await, |
||||
ToTokioAsyncWriteSolutionsWriterKind::Json(write) => write.finish().await, |
||||
ToTokioAsyncWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()), |
||||
ToTokioAsyncWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()), |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,340 @@ |
||||
//! Definition of [`QuerySolution`] structure and associated utility constructions.
|
||||
|
||||
use crate::oxrdf::{Term, Variable, VariableRef}; |
||||
use std::fmt; |
||||
use std::iter::Zip; |
||||
use std::ops::Index; |
||||
use std::sync::Arc; |
||||
|
||||
/// Tuple associating variables and terms that are the result of a SPARQL query.
|
||||
///
|
||||
/// It is the equivalent of a row in SQL.
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QuerySolution;
|
||||
/// use oxrdf::{Variable, Literal};
|
||||
///
|
||||
/// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None]));
|
||||
/// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes).
|
||||
/// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no).
|
||||
/// ```
|
||||
pub struct QuerySolution { |
||||
variables: Arc<[Variable]>, |
||||
values: Vec<Option<Term>>, |
||||
} |
||||
|
||||
impl QuerySolution { |
||||
/// Returns a value for a given position in the tuple ([`usize`](std::usize)) or a given variable name ([`&str`](std::str), [`Variable`] or [`VariableRef`]).
|
||||
///
|
||||
/// ```
|
||||
/// use sparesults::QuerySolution;
|
||||
/// use oxrdf::{Variable, Literal};
|
||||
///
|
||||
/// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None]));
|
||||
/// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes).
|
||||
/// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no).
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn get(&self, index: impl VariableSolutionIndex) -> Option<&Term> { |
||||
self.values.get(index.index(self)?).and_then(Option::as_ref) |
||||
} |
||||
|
||||
/// The number of variables which could be bound.
|
||||
///
|
||||
/// It is also the number of columns in the solutions table.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert_eq!(solution.len(), 2);
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn len(&self) -> usize { |
||||
self.values.len() |
||||
} |
||||
|
||||
/// Is there any variable bound in the table?
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert!(!solution.is_empty());
|
||||
///
|
||||
/// let empty_solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![None, None],
|
||||
/// ));
|
||||
/// assert!(empty_solution.is_empty());
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn is_empty(&self) -> bool { |
||||
self.values.iter().all(Option::is_none) |
||||
} |
||||
|
||||
/// Returns an iterator over bound variables.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert_eq!(
|
||||
/// solution.iter().collect::<Vec<_>>(),
|
||||
/// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())]
|
||||
/// );
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn iter(&self) -> impl Iterator<Item = (&Variable, &Term)> { |
||||
self.into_iter() |
||||
} |
||||
|
||||
/// Returns the ordered slice of variable values.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert_eq!(solution.values(), &[Some(Literal::from(1).into()), None]);
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn values(&self) -> &[Option<Term>] { |
||||
&self.values |
||||
} |
||||
|
||||
/// Returns the ordered slice of the solution variables, bound or not.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert_eq!(
|
||||
/// solution.variables(),
|
||||
/// &[
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar")
|
||||
/// ]
|
||||
/// );
|
||||
/// ```
|
||||
#[inline] |
||||
pub fn variables(&self) -> &[Variable] { |
||||
&self.variables |
||||
} |
||||
} |
||||
|
||||
impl<V: Into<Arc<[Variable]>>, S: Into<Vec<Option<Term>>>> From<(V, S)> for QuerySolution { |
||||
#[inline] |
||||
fn from((v, s): (V, S)) -> Self { |
||||
Self { |
||||
variables: v.into(), |
||||
values: s.into(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<'a> IntoIterator for &'a QuerySolution { |
||||
type Item = (&'a Variable, &'a Term); |
||||
type IntoIter = Iter<'a>; |
||||
|
||||
#[inline] |
||||
fn into_iter(self) -> Self::IntoIter { |
||||
Iter { |
||||
inner: self.variables.iter().zip(&self.values), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Index<usize> for QuerySolution { |
||||
type Output = Term; |
||||
|
||||
#[allow(clippy::panic)] |
||||
#[inline] |
||||
fn index(&self, index: usize) -> &Self::Output { |
||||
self.get(index) |
||||
.unwrap_or_else(|| panic!("The column {index} is not set in this solution")) |
||||
} |
||||
} |
||||
|
||||
impl Index<&str> for QuerySolution { |
||||
type Output = Term; |
||||
|
||||
#[allow(clippy::panic)] |
||||
#[inline] |
||||
fn index(&self, index: &str) -> &Self::Output { |
||||
self.get(index) |
||||
.unwrap_or_else(|| panic!("The variable ?{index} is not set in this solution")) |
||||
} |
||||
} |
||||
|
||||
impl Index<VariableRef<'_>> for QuerySolution { |
||||
type Output = Term; |
||||
|
||||
#[allow(clippy::panic)] |
||||
#[inline] |
||||
fn index(&self, index: VariableRef<'_>) -> &Self::Output { |
||||
self.get(index) |
||||
.unwrap_or_else(|| panic!("The variable {index} is not set in this solution")) |
||||
} |
||||
} |
||||
impl Index<Variable> for QuerySolution { |
||||
type Output = Term; |
||||
|
||||
#[inline] |
||||
fn index(&self, index: Variable) -> &Self::Output { |
||||
self.index(index.as_ref()) |
||||
} |
||||
} |
||||
|
||||
impl Index<&Variable> for QuerySolution { |
||||
type Output = Term; |
||||
|
||||
#[inline] |
||||
fn index(&self, index: &Variable) -> &Self::Output { |
||||
self.index(index.as_ref()) |
||||
} |
||||
} |
||||
|
||||
impl PartialEq for QuerySolution { |
||||
fn eq(&self, other: &Self) -> bool { |
||||
for (k, v) in self.iter() { |
||||
if other.get(k) != Some(v) { |
||||
return false; |
||||
} |
||||
} |
||||
for (k, v) in other.iter() { |
||||
if self.get(k) != Some(v) { |
||||
return false; |
||||
} |
||||
} |
||||
true |
||||
} |
||||
} |
||||
|
||||
impl Eq for QuerySolution {} |
||||
|
||||
impl fmt::Debug for QuerySolution { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
f.debug_map().entries(self.iter()).finish() |
||||
} |
||||
} |
||||
|
||||
/// An iterator over [`QuerySolution`] bound variables.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{Literal, Variable};
|
||||
/// use sparesults::QuerySolution;
|
||||
///
|
||||
/// let solution = QuerySolution::from((
|
||||
/// vec![
|
||||
/// Variable::new_unchecked("foo"),
|
||||
/// Variable::new_unchecked("bar"),
|
||||
/// ],
|
||||
/// vec![Some(Literal::from(1).into()), None],
|
||||
/// ));
|
||||
/// assert_eq!(
|
||||
/// solution.iter().collect::<Vec<_>>(),
|
||||
/// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())]
|
||||
/// );
|
||||
/// ```
|
||||
pub struct Iter<'a> { |
||||
inner: Zip<std::slice::Iter<'a, Variable>, std::slice::Iter<'a, Option<Term>>>, |
||||
} |
||||
|
||||
impl<'a> Iterator for Iter<'a> { |
||||
type Item = (&'a Variable, &'a Term); |
||||
|
||||
#[inline] |
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
for (variable, value) in &mut self.inner { |
||||
if let Some(value) = value { |
||||
return Some((variable, value)); |
||||
} |
||||
} |
||||
None |
||||
} |
||||
|
||||
#[inline] |
||||
fn size_hint(&self) -> (usize, Option<usize>) { |
||||
(0, self.inner.size_hint().1) |
||||
} |
||||
} |
||||
|
||||
/// A utility trait to get values for a given variable or tuple position.
|
||||
///
|
||||
/// See [`QuerySolution::get`].
|
||||
pub trait VariableSolutionIndex { |
||||
fn index(self, solution: &QuerySolution) -> Option<usize>; |
||||
} |
||||
|
||||
impl VariableSolutionIndex for usize { |
||||
#[inline] |
||||
fn index(self, _: &QuerySolution) -> Option<usize> { |
||||
Some(self) |
||||
} |
||||
} |
||||
|
||||
impl VariableSolutionIndex for &str { |
||||
#[inline] |
||||
fn index(self, solution: &QuerySolution) -> Option<usize> { |
||||
solution.variables.iter().position(|v| v.as_str() == self) |
||||
} |
||||
} |
||||
|
||||
impl VariableSolutionIndex for VariableRef<'_> { |
||||
#[inline] |
||||
fn index(self, solution: &QuerySolution) -> Option<usize> { |
||||
solution.variables.iter().position(|v| *v == self) |
||||
} |
||||
} |
||||
|
||||
impl VariableSolutionIndex for &Variable { |
||||
#[inline] |
||||
fn index(self, solution: &QuerySolution) -> Option<usize> { |
||||
self.as_ref().index(solution) |
||||
} |
||||
} |
||||
|
||||
impl VariableSolutionIndex for Variable { |
||||
#[inline] |
||||
fn index(self, solution: &QuerySolution) -> Option<usize> { |
||||
self.as_ref().index(solution) |
||||
} |
||||
} |
@ -0,0 +1,833 @@ |
||||
//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)
|
||||
|
||||
use crate::oxrdf::vocab::rdf; |
||||
use crate::oxrdf::*; |
||||
use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError}; |
||||
use quick_xml::escape::unescape; |
||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; |
||||
use quick_xml::{Decoder, Error, Reader, Writer}; |
||||
use std::collections::BTreeMap; |
||||
use std::io::{self, BufReader, Read, Write}; |
||||
use std::mem::take; |
||||
use std::sync::Arc; |
||||
#[cfg(feature = "async-tokio")] |
||||
use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader}; |
||||
|
||||
pub fn write_boolean_xml_result<W: Write>(write: W, value: bool) -> io::Result<W> { |
||||
let mut writer = Writer::new(write); |
||||
for event in inner_write_boolean_xml_result(value) { |
||||
writer.write_event(event).map_err(map_xml_error)?; |
||||
} |
||||
Ok(writer.into_inner()) |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub async fn tokio_async_write_boolean_xml_result<W: AsyncWrite + Unpin>( |
||||
write: W, |
||||
value: bool, |
||||
) -> io::Result<W> { |
||||
let mut writer = Writer::new(write); |
||||
for event in inner_write_boolean_xml_result(value) { |
||||
writer |
||||
.write_event_async(event) |
||||
.await |
||||
.map_err(map_xml_error)?; |
||||
} |
||||
Ok(writer.into_inner()) |
||||
} |
||||
|
||||
fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] { |
||||
[ |
||||
Event::Decl(BytesDecl::new("1.0", None, None)), |
||||
Event::Start( |
||||
BytesStart::new("sparql") |
||||
.with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]), |
||||
), |
||||
Event::Start(BytesStart::new("head")), |
||||
Event::End(BytesEnd::new("head")), |
||||
Event::Start(BytesStart::new("boolean")), |
||||
Event::Text(BytesText::new(if value { "true" } else { "false" })), |
||||
Event::End(BytesEnd::new("boolean")), |
||||
Event::End(BytesEnd::new("sparql")), |
||||
] |
||||
} |
||||
|
||||
pub struct ToWriteXmlSolutionsWriter<W: Write> { |
||||
inner: InnerXmlSolutionsWriter, |
||||
writer: Writer<W>, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteXmlSolutionsWriter<W> { |
||||
pub fn start(write: W, variables: &[Variable]) -> io::Result<Self> { |
||||
let mut writer = Writer::new(write); |
||||
let mut buffer = Vec::with_capacity(48); |
||||
let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); |
||||
Self::do_write(&mut writer, buffer)?; |
||||
Ok(Self { inner, writer }) |
||||
} |
||||
|
||||
pub fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
let mut buffer = Vec::with_capacity(48); |
||||
self.inner.write(&mut buffer, solution); |
||||
Self::do_write(&mut self.writer, buffer) |
||||
} |
||||
|
||||
pub fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::with_capacity(4); |
||||
self.inner.finish(&mut buffer); |
||||
Self::do_write(&mut self.writer, buffer)?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in output { |
||||
writer.write_event(event).map_err(map_xml_error)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct ToTokioAsyncWriteXmlSolutionsWriter<W: AsyncWrite + Unpin> { |
||||
inner: InnerXmlSolutionsWriter, |
||||
writer: Writer<W>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteXmlSolutionsWriter<W> { |
||||
pub async fn start(write: W, variables: &[Variable]) -> io::Result<Self> { |
||||
let mut writer = Writer::new(write); |
||||
let mut buffer = Vec::with_capacity(48); |
||||
let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables); |
||||
Self::do_write(&mut writer, buffer).await?; |
||||
Ok(Self { inner, writer }) |
||||
} |
||||
|
||||
pub async fn write<'a>( |
||||
&mut self, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) -> io::Result<()> { |
||||
let mut buffer = Vec::with_capacity(48); |
||||
self.inner.write(&mut buffer, solution); |
||||
Self::do_write(&mut self.writer, buffer).await |
||||
} |
||||
|
||||
pub async fn finish(mut self) -> io::Result<W> { |
||||
let mut buffer = Vec::with_capacity(4); |
||||
self.inner.finish(&mut buffer); |
||||
Self::do_write(&mut self.writer, buffer).await?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
|
||||
async fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> { |
||||
for event in output { |
||||
writer |
||||
.write_event_async(event) |
||||
.await |
||||
.map_err(map_xml_error)?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
struct InnerXmlSolutionsWriter; |
||||
|
||||
impl InnerXmlSolutionsWriter { |
||||
fn start<'a>(output: &mut Vec<Event<'a>>, variables: &'a [Variable]) -> Self { |
||||
output.push(Event::Decl(BytesDecl::new("1.0", None, None))); |
||||
output.push(Event::Start(BytesStart::new("sparql").with_attributes([( |
||||
"xmlns", |
||||
"http://www.w3.org/2005/sparql-results#", |
||||
)]))); |
||||
output.push(Event::Start(BytesStart::new("head"))); |
||||
for variable in variables { |
||||
output.push(Event::Empty( |
||||
BytesStart::new("variable").with_attributes([("name", variable.as_str())]), |
||||
)); |
||||
} |
||||
output.push(Event::End(BytesEnd::new("head"))); |
||||
output.push(Event::Start(BytesStart::new("results"))); |
||||
Self {} |
||||
} |
||||
|
||||
#[allow(clippy::unused_self)] |
||||
fn write<'a>( |
||||
&self, |
||||
output: &mut Vec<Event<'a>>, |
||||
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>, |
||||
) { |
||||
output.push(Event::Start(BytesStart::new("result"))); |
||||
for (variable, value) in solution { |
||||
output.push(Event::Start( |
||||
BytesStart::new("binding").with_attributes([("name", variable.as_str())]), |
||||
)); |
||||
write_xml_term(output, value); |
||||
output.push(Event::End(BytesEnd::new("binding"))); |
||||
} |
||||
output.push(Event::End(BytesEnd::new("result"))); |
||||
} |
||||
|
||||
#[allow(clippy::unused_self)] |
||||
fn finish(self, output: &mut Vec<Event<'_>>) { |
||||
output.push(Event::End(BytesEnd::new("results"))); |
||||
output.push(Event::End(BytesEnd::new("sparql"))); |
||||
} |
||||
} |
||||
|
||||
fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) { |
||||
match term { |
||||
TermRef::NamedNode(uri) => { |
||||
output.push(Event::Start(BytesStart::new("uri"))); |
||||
output.push(Event::Text(BytesText::new(uri.as_str()))); |
||||
output.push(Event::End(BytesEnd::new("uri"))); |
||||
} |
||||
TermRef::BlankNode(bnode) => { |
||||
output.push(Event::Start(BytesStart::new("bnode"))); |
||||
output.push(Event::Text(BytesText::new(bnode.as_str()))); |
||||
output.push(Event::End(BytesEnd::new("bnode"))); |
||||
} |
||||
TermRef::Literal(literal) => { |
||||
let mut start = BytesStart::new("literal"); |
||||
if let Some(language) = literal.language() { |
||||
start.push_attribute(("xml:lang", language)); |
||||
} else if !literal.is_plain() { |
||||
start.push_attribute(("datatype", literal.datatype().as_str())) |
||||
} |
||||
output.push(Event::Start(start)); |
||||
output.push(Event::Text(BytesText::new(literal.value()))); |
||||
output.push(Event::End(BytesEnd::new("literal"))); |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
TermRef::Triple(triple) => { |
||||
output.push(Event::Start(BytesStart::new("triple"))); |
||||
output.push(Event::Start(BytesStart::new("subject"))); |
||||
write_xml_term(output, triple.subject.as_ref().into()); |
||||
output.push(Event::End(BytesEnd::new("subject"))); |
||||
output.push(Event::Start(BytesStart::new("predicate"))); |
||||
write_xml_term(output, triple.predicate.as_ref().into()); |
||||
output.push(Event::End(BytesEnd::new("predicate"))); |
||||
output.push(Event::Start(BytesStart::new("object"))); |
||||
write_xml_term(output, triple.object.as_ref()); |
||||
output.push(Event::End(BytesEnd::new("object"))); |
||||
output.push(Event::End(BytesEnd::new("triple"))); |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub enum FromReadXmlQueryResultsReader<R: Read> { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: FromReadXmlSolutionsReader<R>, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
impl<R: Read> FromReadXmlQueryResultsReader<R> { |
||||
pub fn read(read: R) -> Result<Self, QueryResultsParseError> { |
||||
let mut reader = Reader::from_reader(BufReader::new(read)); |
||||
reader.trim_text(true); |
||||
reader.expand_empty_elements(true); |
||||
let mut reader_buffer = Vec::new(); |
||||
let mut inner = XmlInnerQueryResultsReader { |
||||
state: ResultsState::Start, |
||||
variables: Vec::new(), |
||||
decoder: reader.decoder(), |
||||
}; |
||||
loop { |
||||
reader_buffer.clear(); |
||||
let event = reader.read_event_into(&mut reader_buffer)?; |
||||
if let Some(result) = inner.read_event(event)? { |
||||
return Ok(match result { |
||||
XmlInnerQueryResults::Solutions { |
||||
variables, |
||||
solutions, |
||||
} => Self::Solutions { |
||||
variables, |
||||
solutions: FromReadXmlSolutionsReader { |
||||
reader, |
||||
inner: solutions, |
||||
reader_buffer, |
||||
}, |
||||
}, |
||||
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), |
||||
}); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
pub struct FromReadXmlSolutionsReader<R: Read> { |
||||
reader: Reader<BufReader<R>>, |
||||
inner: XmlInnerSolutionsReader, |
||||
reader_buffer: Vec<u8>, |
||||
} |
||||
|
||||
impl<R: Read> FromReadXmlSolutionsReader<R> { |
||||
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
loop { |
||||
self.reader_buffer.clear(); |
||||
let event = self.reader.read_event_into(&mut self.reader_buffer)?; |
||||
if event == Event::Eof { |
||||
return Ok(None); |
||||
} |
||||
if let Some(solution) = self.inner.read_event(event)? { |
||||
return Ok(Some(solution)); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub enum FromTokioAsyncReadXmlQueryResultsReader<R: AsyncRead + Unpin> { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: FromTokioAsyncReadXmlSolutionsReader<R>, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlQueryResultsReader<R> { |
||||
pub async fn read(read: R) -> Result<Self, QueryResultsParseError> { |
||||
let mut reader = Reader::from_reader(AsyncBufReader::new(read)); |
||||
reader.trim_text(true); |
||||
reader.expand_empty_elements(true); |
||||
let mut reader_buffer = Vec::new(); |
||||
let mut inner = XmlInnerQueryResultsReader { |
||||
state: ResultsState::Start, |
||||
variables: Vec::new(), |
||||
decoder: reader.decoder(), |
||||
}; |
||||
loop { |
||||
reader_buffer.clear(); |
||||
let event = reader.read_event_into_async(&mut reader_buffer).await?; |
||||
if let Some(result) = inner.read_event(event)? { |
||||
return Ok(match result { |
||||
XmlInnerQueryResults::Solutions { |
||||
variables, |
||||
solutions, |
||||
} => Self::Solutions { |
||||
variables, |
||||
solutions: FromTokioAsyncReadXmlSolutionsReader { |
||||
reader, |
||||
inner: solutions, |
||||
reader_buffer, |
||||
}, |
||||
}, |
||||
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value), |
||||
}); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
pub struct FromTokioAsyncReadXmlSolutionsReader<R: AsyncRead + Unpin> { |
||||
reader: Reader<AsyncBufReader<R>>, |
||||
inner: XmlInnerSolutionsReader, |
||||
reader_buffer: Vec<u8>, |
||||
} |
||||
|
||||
#[cfg(feature = "async-tokio")] |
||||
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlSolutionsReader<R> { |
||||
pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
loop { |
||||
self.reader_buffer.clear(); |
||||
let event = self |
||||
.reader |
||||
.read_event_into_async(&mut self.reader_buffer) |
||||
.await?; |
||||
if event == Event::Eof { |
||||
return Ok(None); |
||||
} |
||||
if let Some(solution) = self.inner.read_event(event)? { |
||||
return Ok(Some(solution)); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
enum XmlInnerQueryResults { |
||||
Solutions { |
||||
variables: Vec<Variable>, |
||||
solutions: XmlInnerSolutionsReader, |
||||
}, |
||||
Boolean(bool), |
||||
} |
||||
|
||||
#[derive(Clone, Copy)] |
||||
enum ResultsState { |
||||
Start, |
||||
Sparql, |
||||
Head, |
||||
AfterHead, |
||||
Boolean, |
||||
} |
||||
|
||||
struct XmlInnerQueryResultsReader { |
||||
state: ResultsState, |
||||
variables: Vec<Variable>, |
||||
decoder: Decoder, |
||||
} |
||||
|
||||
impl XmlInnerQueryResultsReader { |
||||
pub fn read_event( |
||||
&mut self, |
||||
event: Event<'_>, |
||||
) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> { |
||||
match event { |
||||
Event::Start(event) => match self.state { |
||||
ResultsState::Start => { |
||||
if event.local_name().as_ref() == b"sparql" { |
||||
self.state = ResultsState::Sparql; |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) |
||||
} |
||||
} |
||||
ResultsState::Sparql => { |
||||
if event.local_name().as_ref() == b"head" { |
||||
self.state = ResultsState::Head; |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) |
||||
} |
||||
} |
||||
ResultsState::Head => { |
||||
if event.local_name().as_ref() == b"variable" { |
||||
let name = event.attributes() |
||||
.filter_map(Result::ok) |
||||
.find(|attr| attr.key.local_name().as_ref() == b"name") |
||||
.ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?; |
||||
let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned(); |
||||
let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?; |
||||
if self.variables.contains(&variable) { |
||||
return Err(QueryResultsSyntaxError::msg(format!( |
||||
"The variable {variable} is declared twice" |
||||
)) |
||||
.into()); |
||||
} |
||||
self.variables.push(variable); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"link" { |
||||
// no op
|
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) |
||||
} |
||||
} |
||||
ResultsState::AfterHead => { |
||||
if event.local_name().as_ref() == b"boolean" { |
||||
self.state = ResultsState::Boolean; |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"results" { |
||||
let mut mapping = BTreeMap::default(); |
||||
for (i, var) in self.variables.iter().enumerate() { |
||||
mapping.insert(var.clone().into_string(), i); |
||||
} |
||||
Ok(Some(XmlInnerQueryResults::Solutions { |
||||
variables: take(&mut self.variables), |
||||
solutions: XmlInnerSolutionsReader { |
||||
decoder: self.decoder, |
||||
mapping, |
||||
state_stack: vec![State::Start, State::Start], |
||||
new_bindings: Vec::new(), |
||||
current_var: None, |
||||
term: None, |
||||
lang: None, |
||||
datatype: None, |
||||
subject_stack: Vec::new(), |
||||
predicate_stack: Vec::new(), |
||||
object_stack: Vec::new(), |
||||
}, |
||||
})) |
||||
} else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" { |
||||
Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into()) |
||||
} else { |
||||
Ok(None) |
||||
} |
||||
} |
||||
ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into()) |
||||
}, |
||||
Event::Text(event) => { |
||||
let value = event.unescape()?; |
||||
match self.state { |
||||
ResultsState::Boolean => { |
||||
if value == "true" { |
||||
Ok(Some(XmlInnerQueryResults::Boolean(true))) |
||||
} else if value == "false" { |
||||
Ok(Some(XmlInnerQueryResults::Boolean(false))) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into()) |
||||
} |
||||
} |
||||
_ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into()) |
||||
} |
||||
} |
||||
Event::End(event) => { |
||||
if let ResultsState::Head = self.state { |
||||
if event.local_name().as_ref() == b"head" { |
||||
self.state = ResultsState::AfterHead |
||||
} |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()) |
||||
} |
||||
} |
||||
Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()), |
||||
Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { |
||||
Ok(None) |
||||
} |
||||
Event::Empty(_) => unreachable!("Empty events are expended"), |
||||
Event::CData(_) => { |
||||
Err(QueryResultsSyntaxError::msg( |
||||
"<![CDATA[...]]> are not supported in SPARQL XML results", |
||||
) |
||||
.into()) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
enum State { |
||||
Start, |
||||
Result, |
||||
Binding, |
||||
Uri, |
||||
BNode, |
||||
Literal, |
||||
Triple, |
||||
Subject, |
||||
Predicate, |
||||
Object, |
||||
} |
||||
|
||||
struct XmlInnerSolutionsReader { |
||||
decoder: Decoder, |
||||
mapping: BTreeMap<String, usize>, |
||||
state_stack: Vec<State>, |
||||
new_bindings: Vec<Option<Term>>, |
||||
current_var: Option<String>, |
||||
term: Option<Term>, |
||||
lang: Option<String>, |
||||
datatype: Option<NamedNode>, |
||||
subject_stack: Vec<Term>, |
||||
predicate_stack: Vec<Term>, |
||||
object_stack: Vec<Term>, |
||||
} |
||||
|
||||
impl XmlInnerSolutionsReader { |
||||
#[allow(clippy::unwrap_in_result)] |
||||
pub fn read_event( |
||||
&mut self, |
||||
event: Event<'_>, |
||||
) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> { |
||||
match event { |
||||
Event::Start(event) => match self.state_stack.last().unwrap() { |
||||
State::Start => { |
||||
if event.local_name().as_ref() == b"result" { |
||||
self.new_bindings = vec![None; self.mapping.len()]; |
||||
self.state_stack.push(State::Result); |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!( |
||||
"Expecting <result>, found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()) |
||||
} |
||||
} |
||||
State::Result => { |
||||
if event.local_name().as_ref() == b"binding" { |
||||
let Some(attr) = event |
||||
.attributes() |
||||
.filter_map(Result::ok) |
||||
.find(|attr| attr.key.local_name().as_ref() == b"name") |
||||
else { |
||||
return Err(QueryResultsSyntaxError::msg( |
||||
"No name attribute found for the <binding> tag", |
||||
) |
||||
.into()); |
||||
}; |
||||
self.current_var = |
||||
Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned()); |
||||
self.state_stack.push(State::Binding); |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!( |
||||
"Expecting <binding>, found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()) |
||||
} |
||||
} |
||||
State::Binding | State::Subject | State::Predicate | State::Object => { |
||||
if self.term.is_some() { |
||||
return Err(QueryResultsSyntaxError::msg( |
||||
"There is already a value for the current binding", |
||||
) |
||||
.into()); |
||||
} |
||||
if event.local_name().as_ref() == b"uri" { |
||||
self.state_stack.push(State::Uri); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"bnode" { |
||||
self.state_stack.push(State::BNode); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"literal" { |
||||
for attr in event.attributes() { |
||||
let attr = attr.map_err(Error::from)?; |
||||
if attr.key.as_ref() == b"xml:lang" { |
||||
self.lang = Some( |
||||
unescape(&self.decoder.decode(&attr.value)?)?.into_owned(), |
||||
); |
||||
} else if attr.key.local_name().as_ref() == b"datatype" { |
||||
let iri = self.decoder.decode(&attr.value)?; |
||||
let iri = unescape(&iri)?; |
||||
self.datatype = |
||||
Some(NamedNode::new(iri.as_ref()).map_err(|e| { |
||||
QueryResultsSyntaxError::msg(format!( |
||||
"Invalid datatype IRI '{iri}': {e}" |
||||
)) |
||||
})?); |
||||
} |
||||
} |
||||
self.state_stack.push(State::Literal); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"triple" { |
||||
self.state_stack.push(State::Triple); |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!( |
||||
"Expecting <uri>, <bnode> or <literal> found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()) |
||||
} |
||||
} |
||||
State::Triple => { |
||||
if event.local_name().as_ref() == b"subject" { |
||||
self.state_stack.push(State::Subject); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"predicate" { |
||||
self.state_stack.push(State::Predicate); |
||||
Ok(None) |
||||
} else if event.local_name().as_ref() == b"object" { |
||||
self.state_stack.push(State::Object); |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg(format!( |
||||
"Expecting <subject>, <predicate> or <object> found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()) |
||||
} |
||||
} |
||||
State::Uri => Err(QueryResultsSyntaxError::msg(format!( |
||||
"<uri> must only contain a string, found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()), |
||||
State::BNode => Err(QueryResultsSyntaxError::msg(format!( |
||||
"<uri> must only contain a string, found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()), |
||||
State::Literal => Err(QueryResultsSyntaxError::msg(format!( |
||||
"<uri> must only contain a string, found <{}>", |
||||
self.decoder.decode(event.name().as_ref())? |
||||
)) |
||||
.into()), |
||||
}, |
||||
Event::Text(event) => { |
||||
let data = event.unescape()?; |
||||
match self.state_stack.last().unwrap() { |
||||
State::Uri => { |
||||
self.term = Some( |
||||
NamedNode::new(data.to_string()) |
||||
.map_err(|e| { |
||||
QueryResultsSyntaxError::msg(format!( |
||||
"Invalid IRI value '{data}': {e}" |
||||
)) |
||||
})? |
||||
.into(), |
||||
); |
||||
Ok(None) |
||||
} |
||||
State::BNode => { |
||||
self.term = Some( |
||||
BlankNode::new(data.to_string()) |
||||
.map_err(|e| { |
||||
QueryResultsSyntaxError::msg(format!( |
||||
"Invalid blank node value '{data}': {e}" |
||||
)) |
||||
})? |
||||
.into(), |
||||
); |
||||
Ok(None) |
||||
} |
||||
State::Literal => { |
||||
self.term = Some( |
||||
build_literal(data, self.lang.take(), self.datatype.take())?.into(), |
||||
); |
||||
Ok(None) |
||||
} |
||||
_ => Err(QueryResultsSyntaxError::msg(format!( |
||||
"Unexpected textual value found: {data}" |
||||
)) |
||||
.into()), |
||||
} |
||||
} |
||||
Event::End(_) => match self.state_stack.pop().unwrap() { |
||||
State::Start | State::Uri => Ok(None), |
||||
State::Result => Ok(Some(take(&mut self.new_bindings))), |
||||
State::Binding => { |
||||
if let Some(var) = &self.current_var { |
||||
if let Some(var) = self.mapping.get(var) { |
||||
self.new_bindings[*var] = self.term.take() |
||||
} else { |
||||
return Err( |
||||
QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into() |
||||
); |
||||
} |
||||
} else { |
||||
return Err(QueryResultsSyntaxError::msg( |
||||
"No name found for <binding> tag", |
||||
) |
||||
.into()); |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::Subject => { |
||||
if let Some(subject) = self.term.take() { |
||||
self.subject_stack.push(subject) |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::Predicate => { |
||||
if let Some(predicate) = self.term.take() { |
||||
self.predicate_stack.push(predicate) |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::Object => { |
||||
if let Some(object) = self.term.take() { |
||||
self.object_stack.push(object) |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::BNode => { |
||||
if self.term.is_none() { |
||||
// We default to a random bnode
|
||||
self.term = Some(BlankNode::default().into()) |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::Literal => { |
||||
if self.term.is_none() { |
||||
// We default to the empty literal
|
||||
self.term = |
||||
Some(build_literal("", self.lang.take(), self.datatype.take())?.into()) |
||||
} |
||||
Ok(None) |
||||
} |
||||
State::Triple => { |
||||
#[cfg(feature = "rdf-star")] |
||||
if let (Some(subject), Some(predicate), Some(object)) = ( |
||||
self.subject_stack.pop(), |
||||
self.predicate_stack.pop(), |
||||
self.object_stack.pop(), |
||||
) { |
||||
self.term = Some( |
||||
Triple::new( |
||||
match subject { |
||||
Term::NamedNode(subject) => subject.into(), |
||||
Term::BlankNode(subject) => subject.into(), |
||||
Term::Triple(subject) => Subject::Triple(subject), |
||||
Term::Literal(_) => { |
||||
return Err(QueryResultsSyntaxError::msg( |
||||
"The <subject> value should not be a <literal>", |
||||
) |
||||
.into()); |
||||
} |
||||
}, |
||||
match predicate { |
||||
Term::NamedNode(predicate) => predicate, |
||||
_ => { |
||||
return Err(QueryResultsSyntaxError::msg( |
||||
"The <predicate> value should be an <uri>", |
||||
) |
||||
.into()); |
||||
} |
||||
}, |
||||
object, |
||||
) |
||||
.into(), |
||||
); |
||||
Ok(None) |
||||
} else { |
||||
Err(QueryResultsSyntaxError::msg( |
||||
"A <triple> should contain a <subject>, a <predicate> and an <object>", |
||||
) |
||||
.into()) |
||||
} |
||||
#[cfg(not(feature = "rdf-star"))] |
||||
{ |
||||
Err(QueryResultsSyntaxError::msg( |
||||
"The <triple> tag is only supported with RDF-star", |
||||
) |
||||
.into()) |
||||
} |
||||
} |
||||
}, |
||||
Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => { |
||||
Ok(None) |
||||
} |
||||
Event::Empty(_) => unreachable!("Empty events are expended"), |
||||
Event::CData(_) => Err(QueryResultsSyntaxError::msg( |
||||
"<![CDATA[...]]> are not supported in SPARQL XML results", |
||||
) |
||||
.into()), |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn build_literal( |
||||
value: impl Into<String>, |
||||
lang: Option<String>, |
||||
datatype: Option<NamedNode>, |
||||
) -> Result<Literal, QueryResultsParseError> { |
||||
match lang { |
||||
Some(lang) => { |
||||
if let Some(datatype) = datatype { |
||||
if datatype.as_ref() != rdf::LANG_STRING { |
||||
return Err(QueryResultsSyntaxError::msg(format!( |
||||
"xml:lang value '{lang}' provided with the datatype {datatype}" |
||||
)) |
||||
.into()); |
||||
} |
||||
} |
||||
Literal::new_language_tagged_literal(value, &lang).map_err(|e| { |
||||
QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into() |
||||
}) |
||||
} |
||||
None => Ok(if let Some(datatype) = datatype { |
||||
Literal::new_typed_literal(value, datatype) |
||||
} else { |
||||
Literal::new_simple_literal(value) |
||||
}), |
||||
} |
||||
} |
||||
|
||||
fn map_xml_error(error: Error) -> io::Error { |
||||
match error { |
||||
Error::Io(error) => { |
||||
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error)) |
||||
} |
||||
Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error), |
||||
_ => io::Error::new(io::ErrorKind::InvalidData, error), |
||||
} |
||||
} |
@ -0,0 +1,46 @@ |
||||
Spargebra |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/spargebra.svg)](https://crates.io/crates/spargebra) |
||||
[![Released API docs](https://docs.rs/spargebra/badge.svg)](https://docs.rs/spargebra) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/spargebra)](https://crates.io/crates/spargebra) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
Spargebra is a [SPARQL](https://www.w3.org/TR/sparql11-overview/) parser. |
||||
|
||||
It supports both [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) and [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). |
||||
|
||||
The emitted tree is based on [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) objects. |
||||
|
||||
The API entry point for SPARQL queries is the [`Query`] struct and the API entry point for SPARQL updates is the [`Update`] struct. |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. |
||||
|
||||
This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). |
||||
|
||||
Usage example: |
||||
|
||||
```rust |
||||
use spargebra::Query; |
||||
|
||||
let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }"; |
||||
let query = Query::parse(query_str, None).unwrap(); |
||||
assert_eq!(query.to_string(), query_str); |
||||
``` |
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,9 @@ |
||||
pub mod algebra; |
||||
mod parser; |
||||
mod query; |
||||
pub mod term; |
||||
mod update; |
||||
|
||||
pub use parser::SparqlSyntaxError; |
||||
pub use query::*; |
||||
pub use update::*; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,300 @@ |
||||
use crate::spargebra::algebra::*; |
||||
use crate::spargebra::parser::{parse_query, SparqlSyntaxError}; |
||||
use crate::spargebra::term::*; |
||||
use oxiri::Iri; |
||||
use std::fmt; |
||||
use std::str::FromStr; |
||||
|
||||
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
|
||||
///
|
||||
/// ```
|
||||
/// use spargebra::Query;
|
||||
///
|
||||
/// let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
|
||||
/// let query = Query::parse(query_str, None)?;
|
||||
/// assert_eq!(query.to_string(), query_str);
|
||||
/// assert_eq!(
|
||||
/// query.to_sse(),
|
||||
/// "(project (?s ?p ?o) (bgp (triple ?s ?p ?o)))"
|
||||
/// );
|
||||
/// # Ok::<_, spargebra::SparqlSyntaxError>(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Hash)] |
||||
pub enum Query { |
||||
/// [SELECT](https://www.w3.org/TR/sparql11-query/#select).
|
||||
Select { |
||||
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
|
||||
dataset: Option<QueryDataset>, |
||||
/// The query selection graph pattern.
|
||||
pattern: GraphPattern, |
||||
/// The query base IRI.
|
||||
base_iri: Option<Iri<String>>, |
||||
}, |
||||
/// [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct).
|
||||
Construct { |
||||
/// The query construction template.
|
||||
template: Vec<TriplePattern>, |
||||
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
|
||||
dataset: Option<QueryDataset>, |
||||
/// The query selection graph pattern.
|
||||
pattern: GraphPattern, |
||||
/// The query base IRI.
|
||||
base_iri: Option<Iri<String>>, |
||||
}, |
||||
/// [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe).
|
||||
Describe { |
||||
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
|
||||
dataset: Option<QueryDataset>, |
||||
/// The query selection graph pattern.
|
||||
pattern: GraphPattern, |
||||
/// The query base IRI.
|
||||
base_iri: Option<Iri<String>>, |
||||
}, |
||||
/// [ASK](https://www.w3.org/TR/sparql11-query/#ask).
|
||||
Ask { |
||||
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
|
||||
dataset: Option<QueryDataset>, |
||||
/// The query selection graph pattern.
|
||||
pattern: GraphPattern, |
||||
/// The query base IRI.
|
||||
base_iri: Option<Iri<String>>, |
||||
}, |
||||
} |
||||
|
||||
impl Query { |
||||
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
|
||||
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> { |
||||
parse_query(query, base_iri) |
||||
} |
||||
|
||||
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
|
||||
pub fn to_sse(&self) -> String { |
||||
let mut buffer = String::new(); |
||||
self.fmt_sse(&mut buffer).unwrap(); |
||||
buffer |
||||
} |
||||
|
||||
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
|
||||
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { |
||||
match self { |
||||
Self::Select { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
write!(f, "(base <{base_iri}> ")?; |
||||
} |
||||
if let Some(dataset) = dataset { |
||||
f.write_str("(dataset ")?; |
||||
dataset.fmt_sse(f)?; |
||||
f.write_str(" ")?; |
||||
} |
||||
pattern.fmt_sse(f)?; |
||||
if dataset.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
if base_iri.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
Self::Construct { |
||||
template, |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
write!(f, "(base <{base_iri}> ")?; |
||||
} |
||||
f.write_str("(construct (")?; |
||||
for (i, t) in template.iter().enumerate() { |
||||
if i > 0 { |
||||
f.write_str(" ")?; |
||||
} |
||||
t.fmt_sse(f)?; |
||||
} |
||||
f.write_str(") ")?; |
||||
if let Some(dataset) = dataset { |
||||
f.write_str("(dataset ")?; |
||||
dataset.fmt_sse(f)?; |
||||
f.write_str(" ")?; |
||||
} |
||||
pattern.fmt_sse(f)?; |
||||
if dataset.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
f.write_str(")")?; |
||||
if base_iri.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
Self::Describe { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
write!(f, "(base <{base_iri}> ")?; |
||||
} |
||||
f.write_str("(describe ")?; |
||||
if let Some(dataset) = dataset { |
||||
f.write_str("(dataset ")?; |
||||
dataset.fmt_sse(f)?; |
||||
f.write_str(" ")?; |
||||
} |
||||
pattern.fmt_sse(f)?; |
||||
if dataset.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
f.write_str(")")?; |
||||
if base_iri.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
Self::Ask { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
write!(f, "(base <{base_iri}> ")?; |
||||
} |
||||
f.write_str("(ask ")?; |
||||
if let Some(dataset) = dataset { |
||||
f.write_str("(dataset ")?; |
||||
dataset.fmt_sse(f)?; |
||||
f.write_str(" ")?; |
||||
} |
||||
pattern.fmt_sse(f)?; |
||||
if dataset.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
f.write_str(")")?; |
||||
if base_iri.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Query { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
match self { |
||||
Self::Select { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
writeln!(f, "BASE <{base_iri}>")?; |
||||
} |
||||
write!( |
||||
f, |
||||
"{}", |
||||
SparqlGraphRootPattern { |
||||
pattern, |
||||
dataset: dataset.as_ref() |
||||
} |
||||
) |
||||
} |
||||
Self::Construct { |
||||
template, |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
writeln!(f, "BASE <{base_iri}>")?; |
||||
} |
||||
f.write_str("CONSTRUCT { ")?; |
||||
for triple in template { |
||||
write!(f, "{triple} . ")?; |
||||
} |
||||
f.write_str("}")?; |
||||
if let Some(dataset) = dataset { |
||||
dataset.fmt(f)?; |
||||
} |
||||
write!( |
||||
f, |
||||
" WHERE {{ {} }}", |
||||
SparqlGraphRootPattern { |
||||
pattern, |
||||
dataset: None |
||||
} |
||||
) |
||||
} |
||||
Self::Describe { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
writeln!(f, "BASE <{}>", base_iri.as_str())?; |
||||
} |
||||
f.write_str("DESCRIBE *")?; |
||||
if let Some(dataset) = dataset { |
||||
dataset.fmt(f)?; |
||||
} |
||||
write!( |
||||
f, |
||||
" WHERE {{ {} }}", |
||||
SparqlGraphRootPattern { |
||||
pattern, |
||||
dataset: None |
||||
} |
||||
) |
||||
} |
||||
Self::Ask { |
||||
dataset, |
||||
pattern, |
||||
base_iri, |
||||
} => { |
||||
if let Some(base_iri) = base_iri { |
||||
writeln!(f, "BASE <{base_iri}>")?; |
||||
} |
||||
f.write_str("ASK")?; |
||||
if let Some(dataset) = dataset { |
||||
dataset.fmt(f)?; |
||||
} |
||||
write!( |
||||
f, |
||||
" WHERE {{ {} }}", |
||||
SparqlGraphRootPattern { |
||||
pattern, |
||||
dataset: None |
||||
} |
||||
) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Query { |
||||
type Err = SparqlSyntaxError; |
||||
|
||||
fn from_str(query: &str) -> Result<Self, Self::Err> { |
||||
Self::parse(query, None) |
||||
} |
||||
} |
||||
|
||||
impl<'a> TryFrom<&'a str> for Query { |
||||
type Error = SparqlSyntaxError; |
||||
|
||||
fn try_from(query: &str) -> Result<Self, Self::Error> { |
||||
Self::from_str(query) |
||||
} |
||||
} |
||||
|
||||
impl<'a> TryFrom<&'a String> for Query { |
||||
type Error = SparqlSyntaxError; |
||||
|
||||
fn try_from(query: &String) -> Result<Self, Self::Error> { |
||||
Self::from_str(query) |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,344 @@ |
||||
use crate::spargebra::algebra::*; |
||||
use crate::spargebra::parser::{parse_update, SparqlSyntaxError}; |
||||
use crate::spargebra::term::*; |
||||
use oxiri::Iri; |
||||
use std::fmt; |
||||
use std::str::FromStr; |
||||
|
||||
/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/).
|
||||
///
|
||||
/// ```
|
||||
/// use spargebra::Update;
|
||||
///
|
||||
/// let update_str = "CLEAR ALL ;";
|
||||
/// let update = Update::parse(update_str, None)?;
|
||||
/// assert_eq!(update.to_string().trim(), update_str);
|
||||
/// assert_eq!(update.to_sse(), "(update (clear all))");
|
||||
/// # Ok::<_, spargebra::SparqlSyntaxError>(())
|
||||
/// ```
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Hash)] |
||||
pub struct Update { |
||||
/// The update base IRI.
|
||||
pub base_iri: Option<Iri<String>>, |
||||
/// The [update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate).
|
||||
pub operations: Vec<GraphUpdateOperation>, |
||||
} |
||||
|
||||
impl Update { |
||||
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
|
||||
pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> { |
||||
parse_update(update, base_iri) |
||||
} |
||||
|
||||
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
|
||||
pub fn to_sse(&self) -> String { |
||||
let mut buffer = String::new(); |
||||
self.fmt_sse(&mut buffer).unwrap(); |
||||
buffer |
||||
} |
||||
|
||||
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
|
||||
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { |
||||
if let Some(base_iri) = &self.base_iri { |
||||
write!(f, "(base <{base_iri}> ")?; |
||||
} |
||||
f.write_str("(update")?; |
||||
for op in &self.operations { |
||||
f.write_str(" ")?; |
||||
op.fmt_sse(f)?; |
||||
} |
||||
f.write_str(")")?; |
||||
if self.base_iri.is_some() { |
||||
f.write_str(")")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for Update { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
if let Some(base_iri) = &self.base_iri { |
||||
writeln!(f, "BASE <{base_iri}>")?; |
||||
} |
||||
for update in &self.operations { |
||||
writeln!(f, "{update} ;")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
} |
||||
|
||||
impl FromStr for Update { |
||||
type Err = SparqlSyntaxError; |
||||
|
||||
fn from_str(update: &str) -> Result<Self, Self::Err> { |
||||
Self::parse(update, None) |
||||
} |
||||
} |
||||
|
||||
impl<'a> TryFrom<&'a str> for Update { |
||||
type Error = SparqlSyntaxError; |
||||
|
||||
fn try_from(update: &str) -> Result<Self, Self::Error> { |
||||
Self::from_str(update) |
||||
} |
||||
} |
||||
|
||||
impl<'a> TryFrom<&'a String> for Update { |
||||
type Error = SparqlSyntaxError; |
||||
|
||||
fn try_from(update: &String) -> Result<Self, Self::Error> { |
||||
Self::from_str(update) |
||||
} |
||||
} |
||||
|
||||
/// The [graph update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate).
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Hash)] |
||||
pub enum GraphUpdateOperation { |
||||
/// [insert data](https://www.w3.org/TR/sparql11-update/#defn_insertDataOperation).
|
||||
InsertData { data: Vec<Quad> }, |
||||
/// [delete data](https://www.w3.org/TR/sparql11-update/#defn_deleteDataOperation).
|
||||
DeleteData { data: Vec<GroundQuad> }, |
||||
/// [delete insert](https://www.w3.org/TR/sparql11-update/#defn_deleteInsertOperation).
|
||||
DeleteInsert { |
||||
delete: Vec<GroundQuadPattern>, |
||||
insert: Vec<QuadPattern>, |
||||
using: Option<QueryDataset>, |
||||
pattern: Box<GraphPattern>, |
||||
}, |
||||
/// [load](https://www.w3.org/TR/sparql11-update/#defn_loadOperation).
|
||||
Load { |
||||
silent: bool, |
||||
source: NamedNode, |
||||
destination: GraphName, |
||||
}, |
||||
/// [clear](https://www.w3.org/TR/sparql11-update/#defn_clearOperation).
|
||||
Clear { silent: bool, graph: GraphTarget }, |
||||
/// [create](https://www.w3.org/TR/sparql11-update/#defn_createOperation).
|
||||
Create { silent: bool, graph: NamedNode }, |
||||
/// [drop](https://www.w3.org/TR/sparql11-update/#defn_dropOperation).
|
||||
Drop { silent: bool, graph: GraphTarget }, |
||||
} |
||||
|
||||
impl GraphUpdateOperation { |
||||
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
|
||||
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result { |
||||
match self { |
||||
Self::InsertData { data } => { |
||||
f.write_str("(insertData (")?; |
||||
for (i, t) in data.iter().enumerate() { |
||||
if i > 0 { |
||||
f.write_str(" ")?; |
||||
} |
||||
t.fmt_sse(f)?; |
||||
} |
||||
f.write_str("))") |
||||
} |
||||
Self::DeleteData { data } => { |
||||
f.write_str("(deleteData (")?; |
||||
for (i, t) in data.iter().enumerate() { |
||||
if i > 0 { |
||||
f.write_str(" ")?; |
||||
} |
||||
t.fmt_sse(f)?; |
||||
} |
||||
f.write_str("))") |
||||
} |
||||
Self::DeleteInsert { |
||||
delete, |
||||
insert, |
||||
using, |
||||
pattern, |
||||
} => { |
||||
f.write_str("(modify ")?; |
||||
if let Some(using) = using { |
||||
f.write_str(" (using ")?; |
||||
using.fmt_sse(f)?; |
||||
f.write_str(" ")?; |
||||
pattern.fmt_sse(f)?; |
||||
f.write_str(")")?; |
||||
} else { |
||||
pattern.fmt_sse(f)?; |
||||
} |
||||
if !delete.is_empty() { |
||||
f.write_str(" (delete (")?; |
||||
for (i, t) in delete.iter().enumerate() { |
||||
if i > 0 { |
||||
f.write_str(" ")?; |
||||
} |
||||
t.fmt_sse(f)?; |
||||
} |
||||
f.write_str("))")?; |
||||
} |
||||
if !insert.is_empty() { |
||||
f.write_str(" (insert (")?; |
||||
for (i, t) in insert.iter().enumerate() { |
||||
if i > 0 { |
||||
f.write_str(" ")?; |
||||
} |
||||
t.fmt_sse(f)?; |
||||
} |
||||
f.write_str("))")?; |
||||
} |
||||
f.write_str(")") |
||||
} |
||||
Self::Load { |
||||
silent, |
||||
source, |
||||
destination, |
||||
} => { |
||||
f.write_str("(load ")?; |
||||
if *silent { |
||||
f.write_str("silent ")?; |
||||
} |
||||
write!(f, "{source} ")?; |
||||
destination.fmt_sse(f)?; |
||||
f.write_str(")") |
||||
} |
||||
Self::Clear { silent, graph } => { |
||||
f.write_str("(clear ")?; |
||||
if *silent { |
||||
f.write_str("silent ")?; |
||||
} |
||||
graph.fmt_sse(f)?; |
||||
f.write_str(")") |
||||
} |
||||
Self::Create { silent, graph } => { |
||||
f.write_str("(create ")?; |
||||
if *silent { |
||||
f.write_str("silent ")?; |
||||
} |
||||
write!(f, "{graph})") |
||||
} |
||||
Self::Drop { silent, graph } => { |
||||
f.write_str("(drop ")?; |
||||
if *silent { |
||||
f.write_str("silent ")?; |
||||
} |
||||
graph.fmt_sse(f)?; |
||||
f.write_str(")") |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for GraphUpdateOperation { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
match self { |
||||
Self::InsertData { data } => { |
||||
writeln!(f, "INSERT DATA {{")?; |
||||
write_quads(data, f)?; |
||||
f.write_str("}") |
||||
} |
||||
Self::DeleteData { data } => { |
||||
writeln!(f, "DELETE DATA {{")?; |
||||
write_ground_quads(data, f)?; |
||||
f.write_str("}") |
||||
} |
||||
Self::DeleteInsert { |
||||
delete, |
||||
insert, |
||||
using, |
||||
pattern, |
||||
} => { |
||||
if !delete.is_empty() { |
||||
writeln!(f, "DELETE {{")?; |
||||
for quad in delete { |
||||
writeln!(f, "\t{quad} .")?; |
||||
} |
||||
writeln!(f, "}}")?; |
||||
} |
||||
if !insert.is_empty() { |
||||
writeln!(f, "INSERT {{")?; |
||||
for quad in insert { |
||||
writeln!(f, "\t{quad} .")?; |
||||
} |
||||
writeln!(f, "}}")?; |
||||
} |
||||
if let Some(using) = using { |
||||
for g in &using.default { |
||||
writeln!(f, "USING {g}")?; |
||||
} |
||||
if let Some(named) = &using.named { |
||||
for g in named { |
||||
writeln!(f, "USING NAMED {g}")?; |
||||
} |
||||
} |
||||
} |
||||
write!( |
||||
f, |
||||
"WHERE {{ {} }}", |
||||
SparqlGraphRootPattern { |
||||
pattern, |
||||
dataset: None |
||||
} |
||||
) |
||||
} |
||||
Self::Load { |
||||
silent, |
||||
source, |
||||
destination, |
||||
} => { |
||||
f.write_str("LOAD ")?; |
||||
if *silent { |
||||
f.write_str("SILENT ")?; |
||||
} |
||||
write!(f, "{source}")?; |
||||
if destination != &GraphName::DefaultGraph { |
||||
write!(f, " INTO GRAPH {destination}")?; |
||||
} |
||||
Ok(()) |
||||
} |
||||
Self::Clear { silent, graph } => { |
||||
f.write_str("CLEAR ")?; |
||||
if *silent { |
||||
f.write_str("SILENT ")?; |
||||
} |
||||
write!(f, "{graph}") |
||||
} |
||||
Self::Create { silent, graph } => { |
||||
f.write_str("CREATE ")?; |
||||
if *silent { |
||||
f.write_str("SILENT ")?; |
||||
} |
||||
write!(f, "GRAPH {graph}") |
||||
} |
||||
Self::Drop { silent, graph } => { |
||||
f.write_str("DROP ")?; |
||||
if *silent { |
||||
f.write_str("SILENT ")?; |
||||
} |
||||
write!(f, "{graph}") |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn write_quads(quads: &[Quad], f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
for quad in quads { |
||||
if quad.graph_name == GraphName::DefaultGraph { |
||||
writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; |
||||
} else { |
||||
writeln!( |
||||
f, |
||||
"\tGRAPH {} {{ {} {} {} }}", |
||||
quad.graph_name, quad.subject, quad.predicate, quad.object |
||||
)?; |
||||
} |
||||
} |
||||
Ok(()) |
||||
} |
||||
|
||||
fn write_ground_quads(quads: &[GroundQuad], f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
for quad in quads { |
||||
if quad.graph_name == GraphName::DefaultGraph { |
||||
writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?; |
||||
} else { |
||||
writeln!( |
||||
f, |
||||
"\tGRAPH {} {{ {} {} {} }}", |
||||
quad.graph_name, quad.subject, quad.predicate, quad.object |
||||
)?; |
||||
} |
||||
} |
||||
Ok(()) |
||||
} |
@ -0,0 +1,33 @@ |
||||
sparopt |
||||
======= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/sparopt.svg)](https://crates.io/crates/sparopt) |
||||
[![Released API docs](https://docs.rs/sparopt/badge.svg)](https://docs.rs/sparopt) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/sparopt)](https://crates.io/crates/sparopt) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) |
||||
|
||||
sparopt is a work in progress [SPARQL Query](https://www.w3.org/TR/sparql11-query/) optimizer. |
||||
|
||||
It relies on the output of [spargebra](https://crates.io/crates/spargebra). |
||||
|
||||
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature. |
||||
|
||||
This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org). |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,5 @@ |
||||
pub use crate::sparopt::optimizer::Optimizer; |
||||
|
||||
pub mod algebra; |
||||
mod optimizer; |
||||
mod type_inference; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,462 @@ |
||||
use crate::oxrdf::Variable; |
||||
use crate::spargebra::algebra::Function; |
||||
use crate::spargebra::term::{GroundTerm, GroundTermPattern, NamedNodePattern}; |
||||
use crate::sparopt::algebra::{Expression, GraphPattern}; |
||||
use std::collections::HashMap; |
||||
use std::ops::{BitAnd, BitOr}; |
||||
|
||||
pub fn infer_graph_pattern_types( |
||||
pattern: &GraphPattern, |
||||
mut types: VariableTypes, |
||||
) -> VariableTypes { |
||||
match pattern { |
||||
GraphPattern::QuadPattern { |
||||
subject, |
||||
predicate, |
||||
object, |
||||
graph_name, |
||||
} => { |
||||
add_ground_term_pattern_types(subject, &mut types, false); |
||||
if let NamedNodePattern::Variable(v) = predicate { |
||||
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) |
||||
} |
||||
add_ground_term_pattern_types(object, &mut types, true); |
||||
if let Some(NamedNodePattern::Variable(v)) = graph_name { |
||||
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) |
||||
} |
||||
types |
||||
} |
||||
GraphPattern::Path { |
||||
subject, |
||||
object, |
||||
graph_name, |
||||
.. |
||||
} => { |
||||
add_ground_term_pattern_types(subject, &mut types, false); |
||||
add_ground_term_pattern_types(object, &mut types, true); |
||||
if let Some(NamedNodePattern::Variable(v)) = graph_name { |
||||
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) |
||||
} |
||||
types |
||||
} |
||||
GraphPattern::Join { left, right, .. } => { |
||||
let mut output_types = infer_graph_pattern_types(left, types.clone()); |
||||
output_types.intersect_with(infer_graph_pattern_types(right, types)); |
||||
output_types |
||||
} |
||||
#[cfg(feature = "sep-0006")] |
||||
GraphPattern::Lateral { left, right } => { |
||||
infer_graph_pattern_types(right, infer_graph_pattern_types(left, types)) |
||||
} |
||||
GraphPattern::LeftJoin { left, right, .. } => { |
||||
let mut right_types = infer_graph_pattern_types(right, types.clone()); // TODO: expression
|
||||
for t in right_types.inner.values_mut() { |
||||
t.undef = true; // Right might be unset
|
||||
} |
||||
let mut output_types = infer_graph_pattern_types(left, types); |
||||
output_types.intersect_with(right_types); |
||||
output_types |
||||
} |
||||
GraphPattern::Minus { left, .. } => infer_graph_pattern_types(left, types), |
||||
GraphPattern::Union { inner } => inner |
||||
.iter() |
||||
.map(|inner| infer_graph_pattern_types(inner, types.clone())) |
||||
.reduce(|mut a, b| { |
||||
a.union_with(b); |
||||
a |
||||
}) |
||||
.unwrap_or_default(), |
||||
GraphPattern::Extend { |
||||
inner, |
||||
variable, |
||||
expression, |
||||
} => { |
||||
let mut types = infer_graph_pattern_types(inner, types); |
||||
types.intersect_variable_with( |
||||
variable.clone(), |
||||
infer_expression_type(expression, &types), |
||||
); |
||||
types |
||||
} |
||||
GraphPattern::Filter { inner, .. } => infer_graph_pattern_types(inner, types), |
||||
GraphPattern::Project { inner, variables } => VariableTypes { |
||||
inner: infer_graph_pattern_types(inner, types) |
||||
.inner |
||||
.into_iter() |
||||
.filter(|(v, _)| variables.contains(v)) |
||||
.collect(), |
||||
}, |
||||
GraphPattern::Distinct { inner } |
||||
| GraphPattern::Reduced { inner } |
||||
| GraphPattern::OrderBy { inner, .. } |
||||
| GraphPattern::Slice { inner, .. } => infer_graph_pattern_types(inner, types), |
||||
GraphPattern::Group { |
||||
inner, |
||||
variables, |
||||
aggregates, |
||||
} => { |
||||
let types = infer_graph_pattern_types(inner, types); |
||||
VariableTypes { |
||||
inner: infer_graph_pattern_types(inner, types) |
||||
.inner |
||||
.into_iter() |
||||
.filter(|(v, _)| variables.contains(v)) |
||||
.chain( |
||||
aggregates |
||||
.iter() |
||||
.map(|(v, _)| (v.clone(), VariableType::ANY)), |
||||
) //TODO: guess from aggregate
|
||||
.collect(), |
||||
} |
||||
} |
||||
GraphPattern::Values { |
||||
variables, |
||||
bindings, |
||||
} => { |
||||
for (i, v) in variables.iter().enumerate() { |
||||
let mut t = VariableType::default(); |
||||
for binding in bindings { |
||||
match binding[i] { |
||||
Some(GroundTerm::NamedNode(_)) => t.named_node = true, |
||||
Some(GroundTerm::Literal(_)) => t.literal = true, |
||||
#[cfg(feature = "rdf-star")] |
||||
Some(GroundTerm::Triple(_)) => t.triple = true, |
||||
None => t.undef = true, |
||||
} |
||||
} |
||||
types.intersect_variable_with(v.clone(), t) |
||||
} |
||||
types |
||||
} |
||||
GraphPattern::Service { |
||||
name, |
||||
inner, |
||||
silent, |
||||
} => { |
||||
let parent_types = types.clone(); |
||||
let mut types = infer_graph_pattern_types(inner, types); |
||||
if let NamedNodePattern::Variable(v) = name { |
||||
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) |
||||
} |
||||
if *silent { |
||||
// On failure, single empty solution
|
||||
types.union_with(parent_types); |
||||
} |
||||
types |
||||
} |
||||
} |
||||
} |
||||
|
||||
fn add_ground_term_pattern_types( |
||||
pattern: &GroundTermPattern, |
||||
types: &mut VariableTypes, |
||||
is_object: bool, |
||||
) { |
||||
if let GroundTermPattern::Variable(v) = pattern { |
||||
types.intersect_variable_with( |
||||
v.clone(), |
||||
if is_object { |
||||
VariableType::TERM |
||||
} else { |
||||
VariableType::SUBJECT |
||||
}, |
||||
) |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
if let GroundTermPattern::Triple(t) = pattern { |
||||
add_ground_term_pattern_types(&t.subject, types, false); |
||||
if let NamedNodePattern::Variable(v) = &t.predicate { |
||||
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE) |
||||
} |
||||
add_ground_term_pattern_types(&t.object, types, true); |
||||
} |
||||
} |
||||
|
||||
pub fn infer_expression_type(expression: &Expression, types: &VariableTypes) -> VariableType { |
||||
match expression { |
||||
Expression::NamedNode(_) => VariableType::NAMED_NODE, |
||||
Expression::Literal(_) | Expression::Exists(_) | Expression::Bound(_) => { |
||||
VariableType::LITERAL |
||||
} |
||||
Expression::Variable(v) => types.get(v), |
||||
Expression::FunctionCall(Function::Datatype | Function::Iri, _) => { |
||||
VariableType::NAMED_NODE | VariableType::UNDEF |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
Expression::FunctionCall(Function::Predicate, _) => { |
||||
VariableType::NAMED_NODE | VariableType::UNDEF |
||||
} |
||||
Expression::FunctionCall(Function::BNode, args) => { |
||||
if args.is_empty() { |
||||
VariableType::BLANK_NODE |
||||
} else { |
||||
VariableType::BLANK_NODE | VariableType::UNDEF |
||||
} |
||||
} |
||||
Expression::FunctionCall( |
||||
Function::Rand | Function::Now | Function::Uuid | Function::StrUuid, |
||||
_, |
||||
) => VariableType::LITERAL, |
||||
Expression::Or(_) |
||||
| Expression::And(_) |
||||
| Expression::Equal(_, _) |
||||
| Expression::Greater(_, _) |
||||
| Expression::GreaterOrEqual(_, _) |
||||
| Expression::Less(_, _) |
||||
| Expression::LessOrEqual(_, _) |
||||
| Expression::Add(_, _) |
||||
| Expression::Subtract(_, _) |
||||
| Expression::Multiply(_, _) |
||||
| Expression::Divide(_, _) |
||||
| Expression::UnaryPlus(_) |
||||
| Expression::UnaryMinus(_) |
||||
| Expression::Not(_) |
||||
| Expression::FunctionCall( |
||||
Function::Str |
||||
| Function::Lang |
||||
| Function::LangMatches |
||||
| Function::Abs |
||||
| Function::Ceil |
||||
| Function::Floor |
||||
| Function::Round |
||||
| Function::Concat |
||||
| Function::SubStr |
||||
| Function::StrLen |
||||
| Function::Replace |
||||
| Function::UCase |
||||
| Function::LCase |
||||
| Function::EncodeForUri |
||||
| Function::Contains |
||||
| Function::StrStarts |
||||
| Function::StrEnds |
||||
| Function::StrBefore |
||||
| Function::StrAfter |
||||
| Function::Year |
||||
| Function::Month |
||||
| Function::Day |
||||
| Function::Hours |
||||
| Function::Minutes |
||||
| Function::Seconds |
||||
| Function::Timezone |
||||
| Function::Tz |
||||
| Function::Md5 |
||||
| Function::Sha1 |
||||
| Function::Sha256 |
||||
| Function::Sha384 |
||||
| Function::Sha512 |
||||
| Function::StrLang |
||||
| Function::StrDt |
||||
| Function::IsIri |
||||
| Function::IsBlank |
||||
| Function::IsLiteral |
||||
| Function::IsNumeric |
||||
| Function::Regex, |
||||
_, |
||||
) => VariableType::LITERAL | VariableType::UNDEF, |
||||
#[cfg(feature = "sep-0002")] |
||||
Expression::FunctionCall(Function::Adjust, _) => { |
||||
VariableType::LITERAL | VariableType::UNDEF |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
Expression::FunctionCall(Function::IsTriple, _) => { |
||||
VariableType::LITERAL | VariableType::UNDEF |
||||
} |
||||
Expression::SameTerm(left, right) => { |
||||
if infer_expression_type(left, types).undef || infer_expression_type(right, types).undef |
||||
{ |
||||
VariableType::LITERAL | VariableType::UNDEF |
||||
} else { |
||||
VariableType::LITERAL |
||||
} |
||||
} |
||||
Expression::If(_, then, els) => { |
||||
infer_expression_type(then, types) | infer_expression_type(els, types) |
||||
} |
||||
Expression::Coalesce(inner) => { |
||||
let mut t = VariableType::UNDEF; |
||||
for e in inner { |
||||
let new = infer_expression_type(e, types); |
||||
t = t | new; |
||||
if !new.undef { |
||||
t.undef = false; |
||||
return t; |
||||
} |
||||
} |
||||
t |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
Expression::FunctionCall(Function::Triple, _) => VariableType::TRIPLE | VariableType::UNDEF, |
||||
#[cfg(feature = "rdf-star")] |
||||
Expression::FunctionCall(Function::Subject, _) => { |
||||
VariableType::SUBJECT | VariableType::UNDEF |
||||
} |
||||
#[cfg(feature = "rdf-star")] |
||||
Expression::FunctionCall(Function::Object, _) => VariableType::TERM | VariableType::UNDEF, |
||||
Expression::FunctionCall(Function::Custom(_), _) => VariableType::ANY, |
||||
} |
||||
} |
||||
|
||||
#[derive(Default, Clone, Debug)] |
||||
pub struct VariableTypes { |
||||
inner: HashMap<Variable, VariableType>, |
||||
} |
||||
|
||||
impl VariableTypes { |
||||
pub fn get(&self, variable: &Variable) -> VariableType { |
||||
self.inner |
||||
.get(variable) |
||||
.copied() |
||||
.unwrap_or(VariableType::UNDEF) |
||||
} |
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&Variable, &VariableType)> { |
||||
self.inner.iter() |
||||
} |
||||
|
||||
pub fn intersect_with(&mut self, other: Self) { |
||||
for (v, t) in other.inner { |
||||
self.intersect_variable_with(v, t); |
||||
} |
||||
} |
||||
|
||||
pub fn union_with(&mut self, other: Self) { |
||||
for (v, t) in &mut self.inner { |
||||
if other.get(v).undef { |
||||
t.undef = true; // Might be undefined
|
||||
} |
||||
} |
||||
for (v, mut t) in other.inner { |
||||
self.inner |
||||
.entry(v) |
||||
.and_modify(|ex| *ex = *ex | t) |
||||
.or_insert({ |
||||
t.undef = true; |
||||
t |
||||
}); |
||||
} |
||||
} |
||||
|
||||
fn intersect_variable_with(&mut self, variable: Variable, t: VariableType) { |
||||
let t = self.get(&variable) & t; |
||||
if t != VariableType::UNDEF { |
||||
self.inner.insert(variable, t); |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[allow(clippy::struct_excessive_bools)] |
||||
#[derive(Clone, Copy, Eq, PartialEq, Debug, Default)] |
||||
pub struct VariableType { |
||||
pub undef: bool, |
||||
pub named_node: bool, |
||||
pub blank_node: bool, |
||||
pub literal: bool, |
||||
#[cfg(feature = "rdf-star")] |
||||
pub triple: bool, |
||||
} |
||||
|
||||
impl VariableType { |
||||
const ANY: Self = Self { |
||||
undef: true, |
||||
named_node: true, |
||||
blank_node: true, |
||||
literal: true, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: true, |
||||
}; |
||||
const BLANK_NODE: Self = Self { |
||||
undef: false, |
||||
named_node: false, |
||||
blank_node: true, |
||||
literal: false, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: false, |
||||
}; |
||||
const LITERAL: Self = Self { |
||||
undef: false, |
||||
named_node: false, |
||||
blank_node: false, |
||||
literal: true, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: false, |
||||
}; |
||||
const NAMED_NODE: Self = Self { |
||||
undef: false, |
||||
named_node: true, |
||||
blank_node: false, |
||||
literal: false, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: false, |
||||
}; |
||||
const SUBJECT: Self = Self { |
||||
undef: false, |
||||
named_node: true, |
||||
blank_node: true, |
||||
literal: false, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: true, |
||||
}; |
||||
const TERM: Self = Self { |
||||
undef: false, |
||||
named_node: true, |
||||
blank_node: true, |
||||
literal: true, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: true, |
||||
}; |
||||
#[cfg(feature = "rdf-star")] |
||||
const TRIPLE: Self = Self { |
||||
undef: false, |
||||
named_node: false, |
||||
blank_node: false, |
||||
literal: false, |
||||
triple: true, |
||||
}; |
||||
pub const UNDEF: Self = Self { |
||||
undef: true, |
||||
named_node: false, |
||||
blank_node: false, |
||||
literal: false, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: false, |
||||
}; |
||||
} |
||||
|
||||
impl BitOr for VariableType { |
||||
type Output = Self; |
||||
|
||||
fn bitor(self, other: Self) -> Self { |
||||
Self { |
||||
undef: self.undef || other.undef, |
||||
named_node: self.named_node || other.named_node, |
||||
blank_node: self.blank_node || other.blank_node, |
||||
literal: self.literal || other.literal, |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: self.triple || other.triple, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl BitAnd for VariableType { |
||||
type Output = Self; |
||||
|
||||
#[allow(clippy::nonminimal_bool)] |
||||
fn bitand(self, other: Self) -> Self { |
||||
Self { |
||||
undef: self.undef && other.undef, |
||||
named_node: self.named_node && other.named_node |
||||
|| (self.undef && other.named_node) |
||||
|| (self.named_node && other.undef), |
||||
blank_node: self.blank_node && other.blank_node |
||||
|| (self.undef && other.blank_node) |
||||
|| (self.blank_node && other.undef), |
||||
literal: self.literal && other.literal |
||||
|| (self.undef && other.literal) |
||||
|| (self.literal && other.undef), |
||||
#[cfg(feature = "rdf-star")] |
||||
triple: self.triple && other.triple |
||||
|| (self.undef && other.triple) |
||||
|| (self.triple && other.undef), |
||||
} |
||||
} |
||||
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue