parent
94986a0d28
commit
69d8ce6b4e
@ -0,0 +1,37 @@ |
||||
#![no_main] |
||||
|
||||
use libfuzzer_sys::fuzz_target; |
||||
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer}; |
||||
|
||||
fuzz_target!(|data: &[u8]| { |
||||
// We parse
|
||||
let mut triples = Vec::new(); |
||||
for triple in RdfXmlParser::new().parse_from_read(data) { |
||||
if let Ok(triple) = triple { |
||||
triples.push(triple); |
||||
} |
||||
} |
||||
|
||||
// We serialize
|
||||
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new()); |
||||
for triple in &triples { |
||||
writer.write_triple(triple).unwrap(); |
||||
} |
||||
let new_serialization = writer.finish().unwrap(); |
||||
|
||||
// We parse the serialization
|
||||
let new_triples = RdfXmlParser::new() |
||||
.parse_from_read(new_serialization.as_slice()) |
||||
.collect::<Result<Vec<_>, _>>() |
||||
.map_err(|e| { |
||||
format!( |
||||
"Error on {:?} from {triples:?} based on {:?}: {e}", |
||||
String::from_utf8_lossy(&new_serialization), |
||||
String::from_utf8_lossy(data) |
||||
) |
||||
}) |
||||
.unwrap(); |
||||
|
||||
// We check the roundtrip has not changed anything
|
||||
assert_eq!(new_triples, triples); |
||||
}); |
@ -0,0 +1,23 @@ |
||||
[package] |
||||
name = "oxrdfxml" |
||||
version = "0.1.0-alpha.1-dev" |
||||
authors = ["Tpt <thomas@pellissier-tanon.fr>"] |
||||
license = "MIT OR Apache-2.0" |
||||
readme = "README.md" |
||||
keywords = ["RDF/XML", "RDF"] |
||||
repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" |
||||
homepage = "https://oxigraph.org/" |
||||
description = """ |
||||
Parser for the RDF/XML language |
||||
""" |
||||
edition = "2021" |
||||
rust-version = "1.65" |
||||
|
||||
[dependencies] |
||||
oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" } |
||||
oxilangtag = "0.1" |
||||
oxiri = "0.2" |
||||
quick-xml = "0.29" |
||||
|
||||
[package.metadata.docs.rs] |
||||
all-features = true |
@ -0,0 +1,52 @@ |
||||
OxRDF/XML |
||||
========= |
||||
|
||||
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml) |
||||
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml) |
||||
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml) |
||||
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) |
||||
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) |
||||
|
||||
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). |
||||
|
||||
Usage example counting the number of people in a RDF/XML file: |
||||
```rust |
||||
use oxrdf::{NamedNodeRef, vocab::rdf}; |
||||
use oxrdfxml::RdfXmlParser; |
||||
|
||||
let file = b"<?xml version=\"1.0\"?> |
||||
<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:schema=\"http://schema.org/\"> |
||||
<rdf:Description rdf:about=\"http://example.com/foo\"> |
||||
<rdf:type rdf:resource=\"http://schema.org/Person\" /> |
||||
<schema:name>Foo</schema:name> |
||||
</rdf:Description> |
||||
<schema:Person rdf:about=\"http://example.com/bar\" schema:name=\"Bar\" /> |
||||
</rdf:RDF>"; |
||||
|
||||
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap(); |
||||
let mut count = 0; |
||||
for triple in RdfXmlParser::new().parse_from_read(file.as_ref()) { |
||||
let triple = triple.unwrap(); |
||||
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() { |
||||
count += 1; |
||||
} |
||||
} |
||||
assert_eq!(2, count); |
||||
``` |
||||
|
||||
|
||||
## License |
||||
|
||||
This project is licensed under either of |
||||
|
||||
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or |
||||
`<http://www.apache.org/licenses/LICENSE-2.0>`) |
||||
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or |
||||
`<http://opensource.org/licenses/MIT>`) |
||||
|
||||
at your option. |
||||
|
||||
|
||||
### Contribution |
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. |
@ -0,0 +1,107 @@ |
||||
use oxilangtag::LanguageTagParseError; |
||||
use oxiri::IriParseError; |
||||
use std::error::Error; |
||||
use std::sync::Arc; |
||||
use std::{fmt, io}; |
||||
|
||||
/// Error that might be returned during parsing.
|
||||
///
|
||||
/// It might wrap an IO error or be a parsing error.
|
||||
#[derive(Debug)] |
||||
pub struct RdfXmlError { |
||||
pub(crate) kind: RdfXmlErrorKind, |
||||
} |
||||
|
||||
#[derive(Debug)] |
||||
pub(crate) enum RdfXmlErrorKind { |
||||
Xml(quick_xml::Error), |
||||
XmlAttribute(quick_xml::events::attributes::AttrError), |
||||
InvalidIri { |
||||
iri: String, |
||||
error: IriParseError, |
||||
}, |
||||
InvalidLanguageTag { |
||||
tag: String, |
||||
error: LanguageTagParseError, |
||||
}, |
||||
Other(String), |
||||
} |
||||
|
||||
impl RdfXmlError { |
||||
pub(crate) fn msg(msg: impl Into<String>) -> Self { |
||||
Self { |
||||
kind: RdfXmlErrorKind::Other(msg.into()), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl fmt::Display for RdfXmlError { |
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
||||
match &self.kind { |
||||
RdfXmlErrorKind::Xml(error) => error.fmt(f), |
||||
RdfXmlErrorKind::XmlAttribute(error) => error.fmt(f), |
||||
RdfXmlErrorKind::InvalidIri { iri, error } => { |
||||
write!(f, "error while parsing IRI '{}': {}", iri, error) |
||||
} |
||||
RdfXmlErrorKind::InvalidLanguageTag { tag, error } => { |
||||
write!(f, "error while parsing language tag '{}': {}", tag, error) |
||||
} |
||||
RdfXmlErrorKind::Other(message) => write!(f, "{}", message), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl Error for RdfXmlError { |
||||
fn source(&self) -> Option<&(dyn Error + 'static)> { |
||||
match &self.kind { |
||||
RdfXmlErrorKind::Xml(error) => Some(error), |
||||
RdfXmlErrorKind::XmlAttribute(error) => Some(error), |
||||
RdfXmlErrorKind::InvalidIri { error, .. } => Some(error), |
||||
RdfXmlErrorKind::InvalidLanguageTag { error, .. } => Some(error), |
||||
RdfXmlErrorKind::Other(_) => None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::Error> for RdfXmlError { |
||||
fn from(error: quick_xml::Error) -> Self { |
||||
Self { |
||||
kind: RdfXmlErrorKind::Xml(error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<quick_xml::events::attributes::AttrError> for RdfXmlError { |
||||
fn from(error: quick_xml::events::attributes::AttrError) -> Self { |
||||
Self { |
||||
kind: RdfXmlErrorKind::XmlAttribute(error), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<io::Error> for RdfXmlError { |
||||
fn from(error: io::Error) -> Self { |
||||
Self { |
||||
kind: RdfXmlErrorKind::Xml(quick_xml::Error::Io(Arc::new(error))), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl From<RdfXmlError> for io::Error { |
||||
fn from(error: RdfXmlError) -> Self { |
||||
match error.kind { |
||||
RdfXmlErrorKind::Xml(error) => match error { |
||||
quick_xml::Error::Io(error) => match Arc::try_unwrap(error) { |
||||
Ok(error) => error, |
||||
Err(error) => io::Error::new(error.kind(), error), |
||||
}, |
||||
quick_xml::Error::UnexpectedEof(error) => { |
||||
io::Error::new(io::ErrorKind::UnexpectedEof, error) |
||||
} |
||||
error => io::Error::new(io::ErrorKind::InvalidData, error), |
||||
}, |
||||
RdfXmlErrorKind::Other(error) => io::Error::new(io::ErrorKind::InvalidData, error), |
||||
_ => io::Error::new(io::ErrorKind::InvalidData, error), |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,14 @@ |
||||
#![doc = include_str!("../README.md")] |
||||
#![doc(test(attr(deny(warnings))))] |
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))] |
||||
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] |
||||
|
||||
mod error; |
||||
mod parser; |
||||
mod serializer; |
||||
mod utils; |
||||
|
||||
pub use crate::serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter}; |
||||
pub use error::RdfXmlError; |
||||
pub use parser::{FromReadRdfXmlReader, RdfXmlParser}; |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,229 @@ |
||||
use crate::utils::*; |
||||
use oxrdf::{Subject, SubjectRef, TermRef, TripleRef}; |
||||
use quick_xml::events::*; |
||||
use quick_xml::Writer; |
||||
use std::io; |
||||
use std::io::Write; |
||||
use std::sync::Arc; |
||||
|
||||
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[derive(Default)] |
||||
pub struct RdfXmlSerializer; |
||||
|
||||
impl RdfXmlSerializer { |
||||
/// Builds a new [`RdfXmlSerializer`].
|
||||
#[inline] |
||||
pub fn new() -> Self { |
||||
Self |
||||
} |
||||
|
||||
/// Writes a RdfXml file to a [`Write`] implementation.
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
#[allow(clippy::unused_self)] |
||||
pub fn serialize_to_write<W: Write>(&self, write: W) -> ToWriteRdfXmlWriter<W> { |
||||
ToWriteRdfXmlWriter { |
||||
writer: Writer::new_with_indent(write, b'\t', 1), |
||||
current_subject: None, |
||||
} |
||||
} |
||||
} |
||||
|
||||
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
|
||||
///
|
||||
/// ```
|
||||
/// use oxrdf::{NamedNodeRef, TripleRef};
|
||||
/// use oxrdfxml::RdfXmlSerializer;
|
||||
///
|
||||
/// let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
|
||||
/// writer.write_triple(TripleRef::new(
|
||||
/// NamedNodeRef::new("http://example.com#me")?,
|
||||
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
|
||||
/// NamedNodeRef::new("http://schema.org/Person")?,
|
||||
/// ))?;
|
||||
/// assert_eq!(
|
||||
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<rdf:Description rdf:about=\"http://example.com#me\">\n\t\t<type xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" rdf:resource=\"http://schema.org/Person\"/>\n\t</rdf:Description>\n</rdf:RDF>",
|
||||
/// writer.finish()?.as_slice()
|
||||
/// );
|
||||
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
|
||||
/// ```
|
||||
pub struct ToWriteRdfXmlWriter<W: Write> { |
||||
writer: Writer<W>, |
||||
current_subject: Option<Subject>, |
||||
} |
||||
|
||||
impl<W: Write> ToWriteRdfXmlWriter<W> { |
||||
/// Writes an extra triple.
|
||||
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)] |
||||
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> { |
||||
if self.current_subject.is_none() { |
||||
self.write_start()?; |
||||
} |
||||
|
||||
let triple = t.into(); |
||||
// We open a new rdf:Description if useful
|
||||
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) { |
||||
if self.current_subject.is_some() { |
||||
self.writer |
||||
.write_event(Event::End(BytesEnd::new("rdf:Description"))) |
||||
.map_err(map_err)?; |
||||
} |
||||
|
||||
let mut description_open = BytesStart::new("rdf:Description"); |
||||
match triple.subject { |
||||
SubjectRef::NamedNode(node) => { |
||||
description_open.push_attribute(("rdf:about", node.as_str())) |
||||
} |
||||
SubjectRef::BlankNode(node) => { |
||||
description_open.push_attribute(("rdf:nodeID", node.as_str())) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named or blank subject", |
||||
)) |
||||
} |
||||
} |
||||
self.writer |
||||
.write_event(Event::Start(description_open)) |
||||
.map_err(map_err)?; |
||||
} |
||||
|
||||
let (prop_prefix, prop_value) = split_iri(triple.predicate.as_str()); |
||||
let (prop_qname, prop_xmlns) = if prop_value.is_empty() { |
||||
("prop:", ("xmlns:prop", prop_prefix)) |
||||
} else { |
||||
(prop_value, ("xmlns", prop_prefix)) |
||||
}; |
||||
let property_element = self.writer.create_element(prop_qname); |
||||
let property_element = property_element.with_attribute(prop_xmlns); |
||||
|
||||
match triple.object { |
||||
TermRef::NamedNode(node) => property_element |
||||
.with_attribute(("rdf:resource", node.as_str())) |
||||
.write_empty(), |
||||
TermRef::BlankNode(node) => property_element |
||||
.with_attribute(("rdf:nodeID", node.as_str())) |
||||
.write_empty(), |
||||
TermRef::Literal(literal) => { |
||||
let property_element = if let Some(language) = literal.language() { |
||||
property_element.with_attribute(("xml:lang", language)) |
||||
} else if !literal.is_plain() { |
||||
property_element.with_attribute(("rdf:datatype", literal.datatype().as_str())) |
||||
} else { |
||||
property_element |
||||
}; |
||||
property_element.write_text_content(BytesText::new(literal.value())) |
||||
} |
||||
_ => { |
||||
return Err(io::Error::new( |
||||
io::ErrorKind::InvalidInput, |
||||
"RDF/XML only supports named, blank or literal object", |
||||
)) |
||||
} |
||||
} |
||||
.map_err(map_err)?; |
||||
self.current_subject = Some(triple.subject.into_owned()); |
||||
Ok(()) |
||||
} |
||||
|
||||
pub fn write_start(&mut self) -> io::Result<()> { |
||||
// We open the file
|
||||
self.writer |
||||
.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))) |
||||
.map_err(map_err)?; |
||||
let mut rdf_open = BytesStart::new("rdf:RDF"); |
||||
rdf_open.push_attribute(("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")); |
||||
self.writer |
||||
.write_event(Event::Start(rdf_open)) |
||||
.map_err(map_err) |
||||
} |
||||
|
||||
/// Ends the write process and returns the underlying [`Write`].
|
||||
pub fn finish(mut self) -> io::Result<W> { |
||||
if self.current_subject.is_some() { |
||||
self.writer |
||||
.write_event(Event::End(BytesEnd::new("rdf:Description"))) |
||||
.map_err(map_err)?; |
||||
} else { |
||||
self.write_start()?; |
||||
} |
||||
self.writer |
||||
.write_event(Event::End(BytesEnd::new("rdf:RDF"))) |
||||
.map_err(map_err)?; |
||||
Ok(self.writer.into_inner()) |
||||
} |
||||
} |
||||
|
||||
fn map_err(error: quick_xml::Error) -> io::Error { |
||||
if let quick_xml::Error::Io(error) = error { |
||||
match Arc::try_unwrap(error) { |
||||
Ok(error) => error, |
||||
Err(error) => io::Error::new(error.kind(), error), |
||||
} |
||||
} else { |
||||
io::Error::new(io::ErrorKind::Other, error) |
||||
} |
||||
} |
||||
|
||||
fn split_iri(iri: &str) -> (&str, &str) { |
||||
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') { |
||||
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':') |
||||
{ |
||||
( |
||||
&iri[..position_base + position_add], |
||||
&iri[position_base + position_add..], |
||||
) |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} else { |
||||
(iri, "") |
||||
} |
||||
} |
||||
|
||||
#[test] |
||||
fn test_split_iri() { |
||||
assert_eq!( |
||||
split_iri("http://schema.org/Person"), |
||||
("http://schema.org/", "Person") |
||||
); |
||||
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", "")); |
||||
assert_eq!( |
||||
split_iri("http://schema.org#foo"), |
||||
("http://schema.org#", "foo") |
||||
); |
||||
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo")); |
||||
} |
@ -0,0 +1,26 @@ |
||||
pub fn is_name_start_char(c: char) -> bool { |
||||
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
matches!(c, |
||||
':' |
||||
| 'A'..='Z' |
||||
| '_' |
||||
| 'a'..='z' |
||||
| '\u{C0}'..='\u{D6}' |
||||
| '\u{D8}'..='\u{F6}' |
||||
| '\u{F8}'..='\u{2FF}' |
||||
| '\u{370}'..='\u{37D}' |
||||
| '\u{37F}'..='\u{1FFF}' |
||||
| '\u{200C}'..='\u{200D}' |
||||
| '\u{2070}'..='\u{218F}' |
||||
| '\u{2C00}'..='\u{2FEF}' |
||||
| '\u{3001}'..='\u{D7FF}' |
||||
| '\u{F900}'..='\u{FDCF}' |
||||
| '\u{FDF0}'..='\u{FFFD}' |
||||
| '\u{10000}'..='\u{EFFFF}') |
||||
} |
||||
|
||||
pub fn is_name_char(c: char) -> bool { |
||||
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||
is_name_start_char(c) |
||||
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') |
||||
} |
Loading…
Reference in new issue