Migrates RDF/XML parser from Rio

pull/566/head
Tpt 1 year ago committed by Thomas Tanon
parent 94986a0d28
commit 69d8ce6b4e
  1. 4
      .clusterfuzzlite/build.sh
  2. 6
      .github/workflows/tests.yml
  3. 29
      Cargo.lock
  4. 1
      Cargo.toml
  5. 5
      fuzz/Cargo.toml
  6. 37
      fuzz/fuzz_targets/rdf_xml.rs
  7. 3
      lib/Cargo.toml
  8. 9
      lib/oxrdf/src/blank_node.rs
  9. 23
      lib/oxrdfxml/Cargo.toml
  10. 52
      lib/oxrdfxml/README.md
  11. 107
      lib/oxrdfxml/src/error.rs
  12. 14
      lib/oxrdfxml/src/lib.rs
  13. 1081
      lib/oxrdfxml/src/parser.rs
  14. 229
      lib/oxrdfxml/src/serializer.rs
  15. 26
      lib/oxrdfxml/src/utils.rs
  16. 2
      lib/sparesults/Cargo.toml
  17. 6
      lib/sparopt/src/lib.rs
  18. 2
      lib/src/io/error.rs
  19. 113
      lib/src/io/read.rs
  20. 71
      lib/src/io/write.rs
  21. 2
      lib/src/sparql/model.rs
  22. 2
      lib/src/store.rs
  23. 4
      python/src/io.rs
  24. 17
      server/src/main.rs

@ -15,7 +15,7 @@ function build_seed_corpus() {
cd "$SRC"/oxigraph
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv n3 nquads trig # sparql_results_xml https://github.com/tafia/quick-xml/issues/608
for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
@ -25,4 +25,4 @@ build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -32,6 +32,8 @@ jobs:
working-directory: ./lib/oxsdatatypes
- run: cargo clippy
working-directory: ./lib/oxrdf
- run: cargo clippy
working-directory: ./lib/oxrdfxml
- run: cargo clippy
working-directory: ./lib/oxttl
- run: cargo clippy
@ -76,6 +78,8 @@ jobs:
working-directory: ./lib/oxsdatatypes
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfxml
- run: cargo clippy -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy -- -D warnings -D clippy::all
@ -127,7 +131,7 @@ jobs:
- run: rustup update
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-semver-checks || true
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxttl --exclude sparopt
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph_js --exclude pyoxigraph --exclude oxigraph_testsuite --exclude oxigraph_server --exclude oxrdfxml --exclude oxttl --exclude sparopt
test_linux:
runs-on: ubuntu-latest

29
Cargo.lock generated

@ -946,13 +946,12 @@ dependencies = [
"oxilangtag",
"oxiri",
"oxrdf",
"oxrdfxml",
"oxrocksdb-sys",
"oxsdatatypes",
"oxttl",
"rand",
"regex",
"rio_api",
"rio_xml",
"sha-1",
"sha2",
"siphasher",
@ -1031,6 +1030,16 @@ dependencies = [
"rand",
]
[[package]]
name = "oxrdfxml"
version = "0.1.0-alpha.1-dev"
dependencies = [
"oxilangtag",
"oxiri",
"oxrdf",
"quick-xml",
]
[[package]]
name = "oxrocksdb-sys"
version = "0.4.0-alpha.1-dev"
@ -1279,9 +1288,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.28.2"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1"
checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51"
dependencies = [
"memchr",
]
@ -1411,18 +1420,6 @@ dependencies = [
"rio_api",
]
[[package]]
name = "rio_xml"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2edda57b877119dc326c612ba822e3ca1ee22bfc86781a4e9dc0884756b58c3"
dependencies = [
"oxilangtag",
"oxiri",
"quick-xml",
"rio_api",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"

@ -3,6 +3,7 @@ members = [
"js",
"lib",
"lib/oxrdf",
"lib/oxrdfxml",
"lib/oxsdatatypes",
"lib/oxttl",
"lib/spargebra",

@ -13,6 +13,7 @@ anyhow = "1"
lazy_static = "1"
libfuzzer-sys = "0.4"
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
@ -32,6 +33,10 @@ path = "fuzz_targets/nquads.rs"
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"

@ -0,0 +1,37 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let mut triples = Vec::new();
for triple in RdfXmlParser::new().parse_from_read(data) {
if let Ok(triple) = triple {
triples.push(triple);
}
}
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_from_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -31,13 +31,12 @@ digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.8"
rio_xml = "0.8"
hex = "0.4"
siphasher = "0.3"
lazy_static = "1"
json-event-parser = "0.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] }
oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "oxrdfxml" }
oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" }
oxttl = { version = "0.1.0-alpha.1-dev" , path = "oxttl", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] }

@ -111,7 +111,14 @@ impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline]
fn default() -> Self {
Self::new_from_unique_id(random::<u128>())
// We ensure the ID does not start with a number to be also valid with RDF/XML
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
}
}

@ -0,0 +1,23 @@
[package]
name = "oxrdfxml"
version = "0.1.0-alpha.1-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF/XML", "RDF"]
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml"
homepage = "https://oxigraph.org/"
description = """
Parser for the RDF/XML language
"""
edition = "2021"
rust-version = "1.65"
[dependencies]
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" }
oxilangtag = "0.1"
oxiri = "0.2"
quick-xml = "0.29"
[package.metadata.docs.rs]
all-features = true

@ -0,0 +1,52 @@
OxRDF/XML
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml)
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
Usage example counting the number of people in a RDF/XML file:
```rust
use oxrdf::{NamedNodeRef, vocab::rdf};
use oxrdfxml::RdfXmlParser;
let file = b"<?xml version=\"1.0\"?>
<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:schema=\"http://schema.org/\">
<rdf:Description rdf:about=\"http://example.com/foo\">
<rdf:type rdf:resource=\"http://schema.org/Person\" />
<schema:name>Foo</schema:name>
</rdf:Description>
<schema:Person rdf:about=\"http://example.com/bar\" schema:name=\"Bar\" />
</rdf:RDF>";
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap();
let mut count = 0;
for triple in RdfXmlParser::new().parse_from_read(file.as_ref()) {
let triple = triple.unwrap();
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
count += 1;
}
}
assert_eq!(2, count);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,107 @@
use oxilangtag::LanguageTagParseError;
use oxiri::IriParseError;
use std::error::Error;
use std::sync::Arc;
use std::{fmt, io};
/// Error that might be returned during parsing.
///
/// It might wrap an IO error or be a parsing error.
#[derive(Debug)]
pub struct RdfXmlError {
pub(crate) kind: RdfXmlErrorKind,
}
#[derive(Debug)]
pub(crate) enum RdfXmlErrorKind {
Xml(quick_xml::Error),
XmlAttribute(quick_xml::events::attributes::AttrError),
InvalidIri {
iri: String,
error: IriParseError,
},
InvalidLanguageTag {
tag: String,
error: LanguageTagParseError,
},
Other(String),
}
impl RdfXmlError {
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self {
kind: RdfXmlErrorKind::Other(msg.into()),
}
}
}
impl fmt::Display for RdfXmlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
RdfXmlErrorKind::Xml(error) => error.fmt(f),
RdfXmlErrorKind::XmlAttribute(error) => error.fmt(f),
RdfXmlErrorKind::InvalidIri { iri, error } => {
write!(f, "error while parsing IRI '{}': {}", iri, error)
}
RdfXmlErrorKind::InvalidLanguageTag { tag, error } => {
write!(f, "error while parsing language tag '{}': {}", tag, error)
}
RdfXmlErrorKind::Other(message) => write!(f, "{}", message),
}
}
}
impl Error for RdfXmlError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match &self.kind {
RdfXmlErrorKind::Xml(error) => Some(error),
RdfXmlErrorKind::XmlAttribute(error) => Some(error),
RdfXmlErrorKind::InvalidIri { error, .. } => Some(error),
RdfXmlErrorKind::InvalidLanguageTag { error, .. } => Some(error),
RdfXmlErrorKind::Other(_) => None,
}
}
}
impl From<quick_xml::Error> for RdfXmlError {
fn from(error: quick_xml::Error) -> Self {
Self {
kind: RdfXmlErrorKind::Xml(error),
}
}
}
impl From<quick_xml::events::attributes::AttrError> for RdfXmlError {
fn from(error: quick_xml::events::attributes::AttrError) -> Self {
Self {
kind: RdfXmlErrorKind::XmlAttribute(error),
}
}
}
impl From<io::Error> for RdfXmlError {
fn from(error: io::Error) -> Self {
Self {
kind: RdfXmlErrorKind::Xml(quick_xml::Error::Io(Arc::new(error))),
}
}
}
impl From<RdfXmlError> for io::Error {
fn from(error: RdfXmlError) -> Self {
match error.kind {
RdfXmlErrorKind::Xml(error) => match error {
quick_xml::Error::Io(error) => match Arc::try_unwrap(error) {
Ok(error) => error,
Err(error) => io::Error::new(error.kind(), error),
},
quick_xml::Error::UnexpectedEof(error) => {
io::Error::new(io::ErrorKind::UnexpectedEof, error)
}
error => io::Error::new(io::ErrorKind::InvalidData, error),
},
RdfXmlErrorKind::Other(error) => io::Error::new(io::ErrorKind::InvalidData, error),
_ => io::Error::new(io::ErrorKind::InvalidData, error),
}
}
}

@ -0,0 +1,14 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
mod error;
mod parser;
mod serializer;
mod utils;
pub use crate::serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter};
pub use error::RdfXmlError;
pub use parser::{FromReadRdfXmlReader, RdfXmlParser};

File diff suppressed because it is too large Load Diff

@ -0,0 +1,229 @@
use crate::utils::*;
use oxrdf::{Subject, SubjectRef, TermRef, TripleRef};
use quick_xml::events::*;
use quick_xml::Writer;
use std::io;
use std::io::Write;
use std::sync::Arc;
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
pub struct RdfXmlSerializer;
impl RdfXmlSerializer {
/// Builds a new [`RdfXmlSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a RdfXml file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteRdfXmlWriter<W> {
ToWriteRdfXmlWriter {
writer: Writer::new_with_indent(write, b'\t', 1),
current_subject: None,
}
}
}
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct ToWriteRdfXmlWriter<W: Write> {
writer: Writer<W>,
current_subject: Option<Subject>,
}
impl<W: Write> ToWriteRdfXmlWriter<W> {
/// Writes an extra triple.
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
if self.current_subject.is_none() {
self.write_start()?;
}
let triple = t.into();
// We open a new rdf:Description if useful
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) {
if self.current_subject.is_some() {
self.writer
.write_event(Event::End(BytesEnd::new("rdf:Description")))
.map_err(map_err)?;
}
let mut description_open = BytesStart::new("rdf:Description");
match triple.subject {
SubjectRef::NamedNode(node) => {
description_open.push_attribute(("rdf:about", node.as_str()))
}
SubjectRef::BlankNode(node) => {
description_open.push_attribute(("rdf:nodeID", node.as_str()))
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named or blank subject",
))
}
}
self.writer
.write_event(Event::Start(description_open))
.map_err(map_err)?;
}
let (prop_prefix, prop_value) = split_iri(triple.predicate.as_str());
let (prop_qname, prop_xmlns) = if prop_value.is_empty() {
("prop:", ("xmlns:prop", prop_prefix))
} else {
(prop_value, ("xmlns", prop_prefix))
};
let property_element = self.writer.create_element(prop_qname);
let property_element = property_element.with_attribute(prop_xmlns);
match triple.object {
TermRef::NamedNode(node) => property_element
.with_attribute(("rdf:resource", node.as_str()))
.write_empty(),
TermRef::BlankNode(node) => property_element
.with_attribute(("rdf:nodeID", node.as_str()))
.write_empty(),
TermRef::Literal(literal) => {
let property_element = if let Some(language) = literal.language() {
property_element.with_attribute(("xml:lang", language))
} else if !literal.is_plain() {
property_element.with_attribute(("rdf:datatype", literal.datatype().as_str()))
} else {
property_element
};
property_element.write_text_content(BytesText::new(literal.value()))
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named, blank or literal object",
))
}
}
.map_err(map_err)?;
self.current_subject = Some(triple.subject.into_owned());
Ok(())
}
pub fn write_start(&mut self) -> io::Result<()> {
// We open the file
self.writer
.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)))
.map_err(map_err)?;
let mut rdf_open = BytesStart::new("rdf:RDF");
rdf_open.push_attribute(("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"));
self.writer
.write_event(Event::Start(rdf_open))
.map_err(map_err)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(mut self) -> io::Result<W> {
if self.current_subject.is_some() {
self.writer
.write_event(Event::End(BytesEnd::new("rdf:Description")))
.map_err(map_err)?;
} else {
self.write_start()?;
}
self.writer
.write_event(Event::End(BytesEnd::new("rdf:RDF")))
.map_err(map_err)?;
Ok(self.writer.into_inner())
}
}
fn map_err(error: quick_xml::Error) -> io::Error {
if let quick_xml::Error::Io(error) = error {
match Arc::try_unwrap(error) {
Ok(error) => error,
Err(error) => io::Error::new(error.kind(), error),
}
} else {
io::Error::new(io::ErrorKind::Other, error)
}
}
fn split_iri(iri: &str) -> (&str, &str) {
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') {
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':')
{
(
&iri[..position_base + position_add],
&iri[position_base + position_add..],
)
} else {
(iri, "")
}
} else {
(iri, "")
}
}
#[test]
fn test_split_iri() {
assert_eq!(
split_iri("http://schema.org/Person"),
("http://schema.org/", "Person")
);
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", ""));
assert_eq!(
split_iri("http://schema.org#foo"),
("http://schema.org#", "foo")
);
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo"));
}

@ -0,0 +1,26 @@
pub fn is_name_start_char(c: char) -> bool {
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
matches!(c,
':'
| 'A'..='Z'
| '_'
| 'a'..='z'
| '\u{C0}'..='\u{D6}'
| '\u{D8}'..='\u{F6}'
| '\u{F8}'..='\u{2FF}'
| '\u{370}'..='\u{37D}'
| '\u{37F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
pub fn is_name_char(c: char) -> bool {
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
is_name_start_char(c)
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}

@ -20,7 +20,7 @@ rdf-star = ["oxrdf/rdf-star"]
[dependencies]
json-event-parser = "0.1"
oxrdf = { version = "0.2.0-alpha.1-dev", path="../oxrdf" }
quick-xml = "0.28"
quick-xml = "0.29"
[package.metadata.docs.rs]
all-features = true

@ -1,3 +1,9 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub use crate::optimizer::Optimizer;
pub mod algebra;

@ -1,5 +1,5 @@
use oxiri::IriParseError;
use rio_xml::RdfXmlError;
use oxrdfxml::RdfXmlError;
use std::error::Error;
use std::{fmt, io};

@ -3,14 +3,12 @@
pub use crate::io::error::{ParseError, SyntaxError};
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxiri::{Iri, IriParseError};
use oxiri::IriParseError;
use oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
use oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
use oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
use oxttl::trig::{FromReadTriGReader, TriGParser};
use oxttl::turtle::{FromReadTurtleReader, TurtleParser};
use rio_api::model as rio;
use rio_api::parser::TriplesParser;
use rio_xml::RdfXmlParser;
use std::collections::HashMap;
use std::io::BufRead;
@ -40,7 +38,7 @@ pub struct GraphParser {
enum GraphParserKind {
NTriples(NTriplesParser),
Turtle(TurtleParser),
RdfXml { base_iri: Option<Iri<String>> },
RdfXml(RdfXmlParser),
}
impl GraphParser {
@ -55,7 +53,7 @@ impl GraphParser {
GraphFormat::Turtle => {
GraphParserKind::Turtle(TurtleParser::new().with_quoted_triples())
}
GraphFormat::RdfXml => GraphParserKind::RdfXml { base_iri: None },
GraphFormat::RdfXml => GraphParserKind::RdfXml(RdfXmlParser::new()),
},
}
}
@ -80,9 +78,7 @@ impl GraphParser {
inner: match self.inner {
GraphParserKind::NTriples(p) => GraphParserKind::NTriples(p),
GraphParserKind::Turtle(p) => GraphParserKind::Turtle(p.with_base_iri(base_iri)?),
GraphParserKind::RdfXml { .. } => GraphParserKind::RdfXml {
base_iri: Some(Iri::parse(base_iri.into())?),
},
GraphParserKind::RdfXml(p) => GraphParserKind::RdfXml(p.with_base_iri(base_iri)?),
},
})
}
@ -96,11 +92,8 @@ impl GraphParser {
TripleReaderKind::NTriples(p.parse_from_read(reader))
}
GraphParserKind::Turtle(p) => TripleReaderKind::Turtle(p.parse_from_read(reader)),
GraphParserKind::RdfXml { base_iri } => {
TripleReaderKind::RdfXml(RdfXmlParser::new(reader, base_iri.clone()))
}
GraphParserKind::RdfXml(p) => TripleReaderKind::RdfXml(p.parse_from_read(reader)),
},
buffer: Vec::new(),
}
}
}
@ -124,48 +117,33 @@ impl GraphParser {
pub struct TripleReader<R: BufRead> {
mapper: BlankNodeMapper,
parser: TripleReaderKind<R>,
buffer: Vec<Triple>,
}
#[allow(clippy::large_enum_variant)]
enum TripleReaderKind<R: BufRead> {
NTriples(FromReadNTriplesReader<R>),
Turtle(FromReadTurtleReader<R>),
RdfXml(RdfXmlParser<R>),
RdfXml(FromReadRdfXmlReader<R>),
}
impl<R: BufRead> Iterator for TripleReader<R> {
type Item = Result<Triple, ParseError>;
fn next(&mut self) -> Option<Result<Triple, ParseError>> {
loop {
if let Some(r) = self.buffer.pop() {
return Some(Ok(r));
}
return Some(match &mut self.parser {
TripleReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::RdfXml(parser) => {
if parser.is_end() {
return None;
} else if let Err(e) = parser.parse_step(&mut |t| {
self.buffer.push(self.mapper.triple(RioMapper::triple(&t)));
Ok(())
}) {
Err(e)
} else {
continue;
}
}
});
}
Some(match &mut self.parser {
TripleReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
TripleReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.triple(triple)),
Err(e) => Err(e.into()),
},
})
}
}
@ -291,55 +269,6 @@ impl<R: BufRead> Iterator for QuadReader<R> {
}
}
struct RioMapper;
impl<'a> RioMapper {
fn named_node(node: rio::NamedNode<'a>) -> NamedNode {
NamedNode::new_unchecked(node.iri)
}
fn blank_node(node: rio::BlankNode<'a>) -> BlankNode {
BlankNode::new_unchecked(node.id)
}
fn literal(literal: rio::Literal<'a>) -> Literal {
match literal {
rio::Literal::Simple { value } => Literal::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
Literal::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
Literal::new_typed_literal(value, Self::named_node(datatype))
}
}
}
fn subject(node: rio::Subject<'a>) -> Subject {
match node {
rio::Subject::NamedNode(node) => Self::named_node(node).into(),
rio::Subject::BlankNode(node) => Self::blank_node(node).into(),
rio::Subject::Triple(triple) => Self::triple(triple).into(),
}
}
fn term(node: rio::Term<'a>) -> Term {
match node {
rio::Term::NamedNode(node) => Self::named_node(node).into(),
rio::Term::BlankNode(node) => Self::blank_node(node).into(),
rio::Term::Literal(literal) => Self::literal(literal).into(),
rio::Term::Triple(triple) => Self::triple(triple).into(),
}
}
fn triple(triple: &rio::Triple<'a>) -> Triple {
Triple {
subject: Self::subject(triple.subject),
predicate: Self::named_node(triple.predicate),
object: Self::term(triple.object),
}
}
}
#[derive(Default)]
struct BlankNodeMapper {
bnode_map: HashMap<BlankNode, BlankNode>,

@ -2,13 +2,11 @@
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter};
use oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
use oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
use oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
use oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use rio_api::formatter::TriplesFormatter;
use rio_api::model as rio;
use rio_xml::RdfXmlFormatter;
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
@ -23,7 +21,7 @@ use std::io::{self, Write};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer)?;
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
@ -46,8 +44,8 @@ impl GraphSerializer {
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(&self, writer: W) -> io::Result<TripleWriter<W>> {
Ok(TripleWriter {
pub fn triple_writer<W: Write>(&self, writer: W) -> TripleWriter<W> {
TripleWriter {
formatter: match self.format {
GraphFormat::NTriples => {
TripleWriterKind::NTriples(NTriplesSerializer::new().serialize_to_write(writer))
@ -55,9 +53,11 @@ impl GraphSerializer {
GraphFormat::Turtle => {
TripleWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(writer))
}
GraphFormat::RdfXml => TripleWriterKind::RdfXml(RdfXmlFormatter::new(writer)?),
GraphFormat::RdfXml => {
TripleWriterKind::RdfXml(RdfXmlSerializer::new().serialize_to_write(writer))
}
},
})
}
}
}
@ -71,7 +71,7 @@ impl GraphSerializer {
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer)?;
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
@ -90,7 +90,7 @@ pub struct TripleWriter<W: Write> {
enum TripleWriterKind<W: Write> {
NTriples(ToWriteNTriplesWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
RdfXml(RdfXmlFormatter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
}
impl<W: Write> TripleWriter<W> {
@ -99,54 +99,7 @@ impl<W: Write> TripleWriter<W> {
match &mut self.formatter {
TripleWriterKind::NTriples(writer) => writer.write_triple(triple),
TripleWriterKind::Turtle(writer) => writer.write_triple(triple),
TripleWriterKind::RdfXml(formatter) => {
let triple = triple.into();
formatter.format(&rio::Triple {
subject: match triple.subject {
SubjectRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
SubjectRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
SubjectRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
},
predicate: rio::NamedNode {
iri: triple.predicate.as_str(),
},
object: match triple.object {
TermRef::NamedNode(node) => rio::NamedNode { iri: node.as_str() }.into(),
TermRef::BlankNode(node) => rio::BlankNode { id: node.as_str() }.into(),
TermRef::Literal(literal) => if literal.is_plain() {
if let Some(language) = literal.language() {
rio::Literal::LanguageTaggedString {
value: literal.value(),
language,
}
} else {
rio::Literal::Simple {
value: literal.value(),
}
}
} else {
rio::Literal::Typed {
value: literal.value(),
datatype: rio::NamedNode {
iri: literal.datatype().as_str(),
},
}
}
.into(),
TermRef::Triple(_) => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML does not support RDF-star yet",
))
}
},
})
}
TripleWriterKind::RdfXml(writer) => writer.write_triple(triple),
}
}
@ -155,7 +108,7 @@ impl<W: Write> TripleWriter<W> {
match self.formatter {
TripleWriterKind::NTriples(writer) => writer.finish().flush(),
TripleWriterKind::Turtle(writer) => writer.finish()?.flush(),
TripleWriterKind::RdfXml(formatter) => formatter.finish()?.flush(), //TODO: remove flush when the next version of Rio is going to be released
TripleWriterKind::RdfXml(formatter) => formatter.finish()?.flush(),
}
}
}

@ -115,7 +115,7 @@ impl QueryResults {
format: GraphFormat,
) -> Result<(), EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = GraphSerializer::from_format(format).triple_writer(write)?;
let mut writer = GraphSerializer::from_format(format).triple_writer(write);
for triple in triples {
writer.write(&triple?)?;
}

@ -616,7 +616,7 @@ impl Store {
format: GraphFormat,
from_graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<(), SerializerError> {
let mut writer = GraphSerializer::from_format(format).triple_writer(writer)?;
let mut writer = GraphSerializer::from_format(format).triple_writer(writer);
for quad in self.quads_for_pattern(None, None, None, Some(from_graph_name.into())) {
writer.write(quad?.as_ref())?;
}

@ -125,9 +125,7 @@ pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_
PyWritable::from_data(output)
};
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
let mut writer = GraphSerializer::from_format(graph_format)
.triple_writer(output)
.map_err(map_io_err)?;
let mut writer = GraphSerializer::from_format(graph_format).triple_writer(output);
for i in input.iter()? {
writer
.write(&*i?.extract::<PyRef<PyTriple>>()?)

@ -512,9 +512,11 @@ pub fn main() -> anyhow::Result<()> {
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(stdout.lock(), solutions.variables().to_vec())?;
.solutions_writer(
stdout().lock(),
solutions.variables().to_vec(),
)?;
for solution in solutions {
writer.write(&solution?)?;
}
@ -570,15 +572,14 @@ pub fn main() -> anyhow::Result<()> {
};
if let Some(results_file) = results_file {
let mut writer = GraphSerializer::from_format(format)
.triple_writer(BufWriter::new(File::create(results_file)?))?;
.triple_writer(BufWriter::new(File::create(results_file)?));
for triple in triples {
writer.write(triple?.as_ref())?;
}
writer.finish()?;
} else {
let stdout = stdout(); // Not needed in Rust 1.61
let mut writer = GraphSerializer::from_format(format)
.triple_writer(stdout.lock())?;
let mut writer =
GraphSerializer::from_format(format).triple_writer(stdout().lock());
for triple in triples {
writer.write(triple?.as_ref())?;
}
@ -926,7 +927,7 @@ fn handle_request(
ReadForWrite::build_response(
move |w| {
Ok((
GraphSerializer::from_format(format).triple_writer(w)?,
GraphSerializer::from_format(format).triple_writer(w),
triples,
))
},
@ -1232,7 +1233,7 @@ fn evaluate_sparql_query(
ReadForWrite::build_response(
move |w| {
Ok((
GraphSerializer::from_format(format).triple_writer(w)?,
GraphSerializer::from_format(format).triple_writer(w),
triples,
))
},

Loading…
Cancel
Save