From f183196859f172a5c9af04e001db5ecea6e1ef92 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 13 Aug 2023 12:24:32 +0200 Subject: [PATCH] Adopt new I/O API for parsing --- fuzz/fuzz_targets/sparql_eval.rs | 8 +- js/src/store.rs | 52 ++++------ js/test/store.mjs | 2 +- lib/benches/store.rs | 18 +--- lib/src/io/error.rs | 134 ------------------------ lib/src/io/format.rs | 4 + lib/src/io/mod.rs | 3 +- lib/src/io/read.rs | 7 +- lib/src/lib.rs | 1 + lib/src/sparql/model.rs | 4 +- lib/src/storage/error.rs | 10 +- lib/src/store.rs | 162 +++++++++++++++++------------- lib/tests/store.rs | 20 ++-- python/src/io.rs | 86 ++++++---------- python/src/store.rs | 76 ++++++-------- python/tests/test_io.py | 35 ++++++- server/src/main.rs | 156 +++++++++------------------- testsuite/src/files.rs | 30 +++--- testsuite/src/manifest.rs | 4 +- testsuite/src/parser_evaluator.rs | 97 +++++------------- testsuite/src/report.rs | 16 +-- testsuite/src/sparql_evaluator.rs | 25 ++--- 22 files changed, 333 insertions(+), 617 deletions(-) delete mode 100644 lib/src/io/error.rs diff --git a/fuzz/fuzz_targets/sparql_eval.rs b/fuzz/fuzz_targets/sparql_eval.rs index 969bafe1..5b52f4bd 100644 --- a/fuzz/fuzz_targets/sparql_eval.rs +++ b/fuzz/fuzz_targets/sparql_eval.rs @@ -2,7 +2,7 @@ use lazy_static::lazy_static; use libfuzzer_sys::fuzz_target; -use oxigraph::io::DatasetFormat; +use oxigraph::io::RdfFormat; use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter}; use oxigraph::store::Store; @@ -10,11 +10,7 @@ lazy_static! { static ref STORE: Store = { let store = Store::new().unwrap(); store - .load_dataset( - sparql_smith::DATA_TRIG.as_bytes(), - DatasetFormat::TriG, - None, - ) + .load_dataset(sparql_smith::DATA_TRIG.as_bytes(), RdfFormat::TriG, None) .unwrap(); store }; diff --git a/js/src/store.rs b/js/src/store.rs index ef8673bc..2ec83cd6 100644 --- a/js/src/store.rs +++ b/js/src/store.rs @@ -4,7 +4,7 @@ use crate::format_err; use crate::model::*; use crate::utils::to_err; use js_sys::{Array, Map}; -use oxigraph::io::{DatasetFormat, GraphFormat, RdfFormat}; +use oxigraph::io::RdfFormat; use oxigraph::model::*; use oxigraph::sparql::QueryResults; use oxigraph::store::Store; @@ -148,6 +148,9 @@ impl JsStore { base_iri: &JsValue, to_graph_name: &JsValue, ) -> Result<(), JsValue> { + let Some(format) = RdfFormat::from_media_type(mime_type) else { + return Err(format_err!("Not supported MIME type: {mime_type}")); + }; let base_iri = if base_iri.is_null() || base_iri.is_undefined() { None } else if base_iri.is_string() { @@ -160,49 +163,28 @@ impl JsStore { )); }; - let to_graph_name = - if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? { - Some(graph_name.try_into()?) - } else { - None - }; - - if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { - self.store - .load_graph( - data.as_bytes(), - graph_format, - &to_graph_name.unwrap_or(GraphName::DefaultGraph), - base_iri.as_deref(), - ) - .map_err(to_err) - } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { - if to_graph_name.is_some() { - return Err(format_err!( - "The target graph name parameter is not available for dataset formats" - )); - } - self.store - .load_dataset(data.as_bytes(), dataset_format, base_iri.as_deref()) - .map_err(to_err) + if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? { + self.store.load_graph( + data.as_bytes(), + format, + GraphName::try_from(to_graph_name)?, + base_iri.as_deref(), + ) } else { - Err(format_err!("Not supported MIME type: {mime_type}")) + self.store + .load_dataset(data.as_bytes(), format, base_iri.as_deref()) } + .map_err(to_err) } pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result { let Some(format) = RdfFormat::from_media_type(mime_type) else { return Err(format_err!("Not supported MIME type: {mime_type}")); }; - let from_graph_name = - if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? { - Some(GraphName::try_from(graph_name)?) - } else { - None - }; let mut buffer = Vec::new(); - if let Some(from_graph_name) = &from_graph_name { - self.store.dump_graph(&mut buffer, format, from_graph_name) + if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? { + self.store + .dump_graph(&mut buffer, format, &GraphName::try_from(from_graph_name)?) } else { self.store.dump_dataset(&mut buffer, format) } diff --git a/js/test/store.mjs b/js/test/store.mjs index 2317c022..ed50cb40 100644 --- a/js/test/store.mjs +++ b/js/test/store.mjs @@ -186,7 +186,7 @@ describe("Store", function () { it("dump default graph content", function () { const store = new Store([dataModel.quad(ex, ex, ex, ex)]); - assert.strictEqual("", store.dump("application/n-triples")); + assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph())); }); }); }); diff --git a/lib/benches/store.rs b/lib/benches/store.rs index 17779b19..eaaf71e7 100644 --- a/lib/benches/store.rs +++ b/lib/benches/store.rs @@ -1,7 +1,7 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use oxhttp::model::{Method, Request, Status}; -use oxigraph::io::GraphFormat; -use oxigraph::model::GraphNameRef; +use oxigraph::io::RdfFormat; +use oxigraph::model::{GraphName, GraphNameRef}; use oxigraph::sparql::{Query, QueryResults, Update}; use oxigraph::store::Store; use rand::random; @@ -63,12 +63,7 @@ fn store_load(c: &mut Criterion) { fn do_load(store: &Store, data: &[u8]) { store - .load_graph( - data, - GraphFormat::NTriples, - GraphNameRef::DefaultGraph, - None, - ) + .load_graph(data, RdfFormat::NTriples, GraphName::DefaultGraph, None) .unwrap(); store.optimize().unwrap(); } @@ -76,12 +71,7 @@ fn do_load(store: &Store, data: &[u8]) { fn do_bulk_load(store: &Store, data: &[u8]) { store .bulk_loader() - .load_graph( - data, - GraphFormat::NTriples, - GraphNameRef::DefaultGraph, - None, - ) + .load_graph(data, RdfFormat::NTriples, GraphNameRef::DefaultGraph, None) .unwrap(); store.optimize().unwrap(); } diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs deleted file mode 100644 index 7cbdc8ac..00000000 --- a/lib/src/io/error.rs +++ /dev/null @@ -1,134 +0,0 @@ -use oxiri::IriParseError; -use std::error::Error; -use std::{fmt, io}; - -/// Error returned during RDF format parsing. -#[derive(Debug)] -pub enum ParseError { - /// I/O error during parsing (file not found...). - Io(io::Error), - /// An error in the file syntax. - Syntax(SyntaxError), -} - -impl ParseError { - #[inline] - pub(crate) fn invalid_base_iri(iri: &str, error: IriParseError) -> Self { - Self::Syntax(SyntaxError { - inner: SyntaxErrorKind::InvalidBaseIri { - iri: iri.to_owned(), - error, - }, - }) - } -} - -impl fmt::Display for ParseError { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Io(e) => e.fmt(f), - Self::Syntax(e) => e.fmt(f), - } - } -} - -impl Error for ParseError { - #[inline] - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::Io(e) => Some(e), - Self::Syntax(e) => Some(e), - } - } -} - -impl From for SyntaxError { - #[inline] - fn from(error: oxrdfio::SyntaxError) -> Self { - SyntaxError { - inner: SyntaxErrorKind::IO(error), - } - } -} - -impl From for ParseError { - #[inline] - fn from(error: oxrdfio::ParseError) -> Self { - match error { - oxrdfio::ParseError::Syntax(e) => Self::Syntax(e.into()), - oxrdfio::ParseError::Io(e) => Self::Io(e), - } - } -} - -impl From for ParseError { - #[inline] - fn from(error: io::Error) -> Self { - Self::Io(error) - } -} - -impl From for ParseError { - #[inline] - fn from(error: SyntaxError) -> Self { - Self::Syntax(error) - } -} - -impl From for io::Error { - #[inline] - fn from(error: ParseError) -> Self { - match error { - ParseError::Io(error) => error, - ParseError::Syntax(error) => error.into(), - } - } -} - -/// An error in the syntax of the parsed file. -#[derive(Debug)] -pub struct SyntaxError { - inner: SyntaxErrorKind, -} - -#[derive(Debug)] -enum SyntaxErrorKind { - IO(oxrdfio::SyntaxError), - InvalidBaseIri { iri: String, error: IriParseError }, -} - -impl fmt::Display for SyntaxError { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.inner { - SyntaxErrorKind::IO(e) => e.fmt(f), - SyntaxErrorKind::InvalidBaseIri { iri, error } => { - write!(f, "Invalid base IRI '{iri}': {error}") - } - } - } -} - -impl Error for SyntaxError { - #[inline] - fn source(&self) -> Option<&(dyn Error + 'static)> { - match &self.inner { - SyntaxErrorKind::IO(e) => Some(e), - SyntaxErrorKind::InvalidBaseIri { .. } => None, - } - } -} - -impl From for io::Error { - #[inline] - fn from(error: SyntaxError) -> Self { - match error.inner { - SyntaxErrorKind::IO(error) => error.into(), - SyntaxErrorKind::InvalidBaseIri { iri, error } => Self::new( - io::ErrorKind::InvalidInput, - format!("Invalid IRI '{iri}': {error}"), - ), - } - } -} diff --git a/lib/src/io/format.rs b/lib/src/io/format.rs index 01e112ac..b100d392 100644 --- a/lib/src/io/format.rs +++ b/lib/src/io/format.rs @@ -1,3 +1,5 @@ +#![allow(deprecated)] + use oxrdfio::RdfFormat; /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. @@ -5,6 +7,7 @@ use oxrdfio::RdfFormat; /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[non_exhaustive] +#[deprecated(note = "Use RdfFormat instead")] pub enum GraphFormat { /// [N-Triples](https://www.w3.org/TR/n-triples/) NTriples, @@ -119,6 +122,7 @@ impl From for RdfFormat { /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[non_exhaustive] +#[deprecated(note = "Use RdfFormat instead")] pub enum DatasetFormat { /// [N-Quads](https://www.w3.org/TR/n-quads/) NQuads, diff --git a/lib/src/io/mod.rs b/lib/src/io/mod.rs index 9d91c881..f183157d 100644 --- a/lib/src/io/mod.rs +++ b/lib/src/io/mod.rs @@ -1,11 +1,12 @@ //! Utilities to read and write RDF graphs and datasets. -mod error; mod format; pub mod read; pub mod write; +#[allow(deprecated)] pub use self::format::{DatasetFormat, GraphFormat}; +#[allow(deprecated)] pub use self::read::{DatasetParser, GraphParser}; #[allow(deprecated)] pub use self::write::{DatasetSerializer, GraphSerializer}; diff --git a/lib/src/io/read.rs b/lib/src/io/read.rs index fe414aa1..3e2f0e2a 100644 --- a/lib/src/io/read.rs +++ b/lib/src/io/read.rs @@ -1,10 +1,11 @@ +#![allow(deprecated)] + //! Utilities to read RDF graphs and datasets. -pub use crate::io::error::{ParseError, SyntaxError}; use crate::io::{DatasetFormat, GraphFormat}; use crate::model::*; use oxiri::IriParseError; -use oxrdfio::{FromReadQuadReader, RdfParser}; +use oxrdfio::{FromReadQuadReader, ParseError, RdfParser}; use std::io::Read; /// Parsers for RDF graph serialization formats. @@ -26,6 +27,7 @@ use std::io::Read; /// assert_eq!(triples[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` +#[deprecated(note = "Use RdfParser instead")] pub struct GraphParser { inner: RdfParser, } @@ -116,6 +118,7 @@ impl Iterator for TripleReader { /// assert_eq!(quads[0].subject.to_string(), ""); /// # std::io::Result::Ok(()) /// ``` +#[deprecated(note = "Use RdfParser instead")] pub struct DatasetParser { inner: RdfParser, } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 1ccbe197..e5a680d8 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -1,5 +1,6 @@ #![doc = include_str!("../README.md")] #![doc(test(attr(deny(warnings))))] +#![doc(test(attr(allow(deprecated))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index aa7c83fe..1cd64158 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -95,13 +95,13 @@ impl QueryResults { /// /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::{RdfFormat, GraphFormat}; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let graph = " .\n"; /// /// let store = Store::new()?; - /// store.load_graph(graph.as_bytes(), GraphFormat::NTriples, GraphNameRef::DefaultGraph, None)?; + /// store.load_graph(graph.as_bytes(), RdfFormat::NTriples, GraphName::DefaultGraph, None)?; /// /// let mut results = Vec::new(); /// store.query("CONSTRUCT WHERE { ?s ?p ?o }")?.write_graph(&mut results, RdfFormat::NTriples)?; diff --git a/lib/src/storage/error.rs b/lib/src/storage/error.rs index 754513eb..690b6bfd 100644 --- a/lib/src/storage/error.rs +++ b/lib/src/storage/error.rs @@ -1,4 +1,5 @@ -use crate::io::{read::ParseError, RdfFormat}; +use crate::io::{ParseError, RdfFormat}; +use oxiri::IriParseError; use std::error::Error; use std::fmt; use std::io; @@ -126,6 +127,8 @@ pub enum LoaderError { Parsing(ParseError), /// An error raised during the insertion in the store. Storage(StorageError), + /// The base IRI is invalid. + InvalidBaseIri { iri: String, error: IriParseError }, } impl fmt::Display for LoaderError { @@ -134,6 +137,7 @@ impl fmt::Display for LoaderError { match self { Self::Parsing(e) => e.fmt(f), Self::Storage(e) => e.fmt(f), + Self::InvalidBaseIri { iri, error } => write!(f, "Invalid base IRI '{iri}': {error}"), } } } @@ -144,6 +148,7 @@ impl Error for LoaderError { match self { Self::Parsing(e) => Some(e), Self::Storage(e) => Some(e), + Self::InvalidBaseIri { error, .. } => Some(error), } } } @@ -168,6 +173,9 @@ impl From for io::Error { match error { LoaderError::Storage(error) => error.into(), LoaderError::Parsing(error) => error.into(), + LoaderError::InvalidBaseIri { .. } => { + io::Error::new(io::ErrorKind::InvalidInput, error.to_string()) + } } } } diff --git a/lib/src/store.rs b/lib/src/store.rs index 3923bf80..0667f019 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -23,8 +23,9 @@ //! }; //! # Result::<_, Box>::Ok(()) //! ``` -use crate::io::read::ParseError; -use crate::io::{DatasetFormat, DatasetParser, GraphFormat, GraphParser, RdfFormat, RdfSerializer}; +#[cfg(not(target_family = "wasm"))] +use crate::io::ParseError; +use crate::io::{RdfFormat, RdfParser, RdfSerializer}; use crate::model::*; use crate::sparql::{ evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions, @@ -451,38 +452,43 @@ impl Store { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::GraphFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; - /// store.load_graph(file.as_ref(), GraphFormat::NTriples, GraphNameRef::DefaultGraph, None)?; + /// store.load_graph(file.as_ref(), RdfFormat::NTriples, GraphName::DefaultGraph, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; /// assert!(store.contains(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?); /// # Result::<_, Box>::Ok(()) /// ``` - pub fn load_graph<'a>( + pub fn load_graph( &self, - reader: impl Read, - format: GraphFormat, - to_graph_name: impl Into>, + read: impl Read, + format: impl Into, + to_graph_name: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = GraphParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()) + .without_named_graphs() + .with_default_graph(to_graph_name) + .rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - let quads = parser.read_triples(reader).collect::, _>>()?; - let to_graph_name = to_graph_name.into(); + let quads = parser.parse_read(read).collect::, _>>()?; self.storage.transaction(move |mut t| { for quad in &quads { - t.insert(quad.as_ref().in_graph(to_graph_name))?; + t.insert(quad.as_ref())?; } Ok(()) }) @@ -495,14 +501,14 @@ impl Store { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::DatasetFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; - /// store.load_dataset(file.as_ref(), DatasetFormat::NQuads, None)?; + /// store.load_dataset(file.as_ref(), RdfFormat::NQuads, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; @@ -511,20 +517,23 @@ impl Store { /// ``` pub fn load_dataset( &self, - reader: impl Read, - format: DatasetFormat, + read: impl Read, + format: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = DatasetParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()).rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - let quads = parser.read_quads(reader).collect::, _>>()?; + let quads = parser.parse_read(read).collect::, _>>()?; self.storage.transaction(move |mut t| { for quad in &quads { - t.insert(quad.into())?; + t.insert(quad.as_ref())?; } Ok(()) }) @@ -595,16 +604,16 @@ impl Store { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::GraphFormat; - /// use oxigraph::model::GraphNameRef; + /// use oxigraph::io::RdfFormat; + /// use oxigraph::model::*; /// /// let file = " .\n".as_bytes(); /// /// let store = Store::new()?; - /// store.load_graph(file, GraphFormat::NTriples, GraphNameRef::DefaultGraph, None)?; + /// store.load_graph(file, RdfFormat::NTriples, GraphName::DefaultGraph, None)?; /// /// let mut buffer = Vec::new(); - /// store.dump_graph(&mut buffer, GraphFormat::NTriples, GraphNameRef::DefaultGraph)?; + /// store.dump_graph(&mut buffer, RdfFormat::NTriples, GraphNameRef::DefaultGraph)?; /// assert_eq!(file, buffer.as_slice()); /// # std::io::Result::Ok(()) /// ``` @@ -626,15 +635,15 @@ impl Store { /// /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::DatasetFormat; + /// use oxigraph::io::RdfFormat; /// /// let file = " .\n".as_bytes(); /// /// let store = Store::new()?; - /// store.load_dataset(file, DatasetFormat::NQuads, None)?; + /// store.load_dataset(file, RdfFormat::NQuads, None)?; /// /// let mut buffer = Vec::new(); - /// store.dump_dataset(&mut buffer, DatasetFormat::NQuads)?; + /// store.dump_dataset(&mut buffer, RdfFormat::NQuads)?; /// assert_eq!(file, buffer.as_slice()); /// # std::io::Result::Ok(()) /// ``` @@ -841,14 +850,14 @@ impl Store { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::DatasetFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // quads file insertion /// let file = b" ."; - /// store.bulk_loader().load_dataset(file.as_ref(), DatasetFormat::NQuads, None)?; + /// store.bulk_loader().load_dataset(file.as_ref(), RdfFormat::NQuads, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; @@ -1061,7 +1070,7 @@ impl<'a> Transaction<'a> { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::GraphFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; @@ -1069,7 +1078,7 @@ impl<'a> Transaction<'a> { /// // insertion /// let file = b" ."; /// store.transaction(|mut transaction| { - /// transaction.load_graph(file.as_ref(), GraphFormat::NTriples, GraphNameRef::DefaultGraph, None) + /// transaction.load_graph(file.as_ref(), RdfFormat::NTriples, GraphName::DefaultGraph, None) /// })?; /// /// // we inspect the store contents @@ -1077,23 +1086,27 @@ impl<'a> Transaction<'a> { /// assert!(store.contains(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?); /// # Result::<_,oxigraph::store::LoaderError>::Ok(()) /// ``` - pub fn load_graph<'b>( + pub fn load_graph( &mut self, - reader: impl Read, - format: GraphFormat, - to_graph_name: impl Into>, + read: impl Read, + format: impl Into, + to_graph_name: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = GraphParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()) + .without_named_graphs() + .with_default_graph(to_graph_name) + .rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - let to_graph_name = to_graph_name.into(); - for triple in parser.read_triples(reader) { - self.writer - .insert(triple?.as_ref().in_graph(to_graph_name))?; + for quad in parser.parse_read(read) { + self.writer.insert(quad?.as_ref())?; } Ok(()) } @@ -1103,7 +1116,7 @@ impl<'a> Transaction<'a> { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::DatasetFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; @@ -1111,7 +1124,7 @@ impl<'a> Transaction<'a> { /// // insertion /// let file = b" ."; /// store.transaction(|mut transaction| { - /// transaction.load_dataset(file.as_ref(), DatasetFormat::NQuads, None) + /// transaction.load_dataset(file.as_ref(), RdfFormat::NQuads, None) /// })?; /// /// // we inspect the store contents @@ -1121,17 +1134,20 @@ impl<'a> Transaction<'a> { /// ``` pub fn load_dataset( &mut self, - reader: impl Read, - format: DatasetFormat, + read: impl Read, + format: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = DatasetParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()).rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - for quad in parser.read_quads(reader) { + for quad in parser.parse_read(read) { self.writer.insert(quad?.as_ref())?; } Ok(()) @@ -1365,14 +1381,14 @@ impl Iterator for GraphNameIter { /// Usage example with loading a dataset: /// ``` /// use oxigraph::store::Store; -/// use oxigraph::io::DatasetFormat; +/// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // quads file insertion /// let file = b" ."; -/// store.bulk_loader().load_dataset(file.as_ref(), DatasetFormat::NQuads, None)?; +/// store.bulk_loader().load_dataset(file.as_ref(), RdfFormat::NQuads, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; @@ -1448,14 +1464,14 @@ impl BulkLoader { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::DatasetFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; - /// store.bulk_loader().load_dataset(file.as_ref(), DatasetFormat::NQuads, None)?; + /// store.bulk_loader().load_dataset(file.as_ref(), RdfFormat::NQuads, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; @@ -1464,17 +1480,20 @@ impl BulkLoader { /// ``` pub fn load_dataset( &self, - reader: impl Read, - format: DatasetFormat, + read: impl Read, + format: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = DatasetParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()).rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - self.load_ok_quads(parser.read_quads(reader).filter_map(|r| match r { + self.load_ok_quads(parser.parse_read(read).filter_map(|r| match r { Ok(q) => Some(Ok(q)), Err(e) => { if let Some(callback) = &self.on_parse_error { @@ -1503,36 +1522,41 @@ impl BulkLoader { /// Usage example: /// ``` /// use oxigraph::store::Store; - /// use oxigraph::io::GraphFormat; + /// use oxigraph::io::RdfFormat; /// use oxigraph::model::*; /// /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; - /// store.bulk_loader().load_graph(file.as_ref(), GraphFormat::NTriples, GraphNameRef::DefaultGraph, None)?; + /// store.bulk_loader().load_graph(file.as_ref(), RdfFormat::NTriples, GraphName::DefaultGraph, None)?; /// /// // we inspect the store contents /// let ex = NamedNodeRef::new("http://example.com")?; /// assert!(store.contains(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?); /// # Result::<_, Box>::Ok(()) /// ``` - pub fn load_graph<'a>( + pub fn load_graph( &self, - reader: impl Read, - format: GraphFormat, - to_graph_name: impl Into>, + read: impl Read, + format: impl Into, + to_graph_name: impl Into, base_iri: Option<&str>, ) -> Result<(), LoaderError> { - let mut parser = GraphParser::from_format(format); + let mut parser = RdfParser::from_format(format.into()) + .without_named_graphs() + .with_default_graph(to_graph_name) + .rename_blank_nodes(); if let Some(base_iri) = base_iri { parser = parser .with_base_iri(base_iri) - .map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; + .map_err(|e| LoaderError::InvalidBaseIri { + iri: base_iri.into(), + error: e, + })?; } - let to_graph_name = to_graph_name.into(); - self.load_ok_quads(parser.read_triples(reader).filter_map(|r| match r { - Ok(q) => Some(Ok(q.in_graph(to_graph_name.into_owned()))), + self.load_ok_quads(parser.parse_read(read).filter_map(|r| match r { + Ok(q) => Some(Ok(q)), Err(e) => { if let Some(callback) = &self.on_parse_error { if let Err(e) = callback(e) { diff --git a/lib/tests/store.rs b/lib/tests/store.rs index 750d74c8..1b28a03d 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -1,4 +1,4 @@ -use oxigraph::io::{DatasetFormat, GraphFormat, RdfFormat}; +use oxigraph::io::RdfFormat; use oxigraph::model::vocab::{rdf, xsd}; use oxigraph::model::*; use oxigraph::store::Store; @@ -109,7 +109,7 @@ fn test_load_graph() -> Result<(), Box> { let store = Store::new()?; store.load_graph( DATA.as_bytes(), - GraphFormat::Turtle, + RdfFormat::Turtle, GraphNameRef::DefaultGraph, None, )?; @@ -126,8 +126,8 @@ fn test_bulk_load_graph() -> Result<(), Box> { let store = Store::new()?; store.bulk_loader().load_graph( DATA.as_bytes(), - GraphFormat::Turtle, - GraphNameRef::DefaultGraph, + RdfFormat::Turtle, + GraphName::DefaultGraph, None, )?; for q in quads(GraphNameRef::DefaultGraph) { @@ -143,8 +143,8 @@ fn test_bulk_load_graph_lenient() -> Result<(), Box> { let store = Store::new()?; store.bulk_loader().on_parse_error(|_| Ok(())).load_graph( b" .\n .".as_slice(), - GraphFormat::NTriples, - GraphNameRef::DefaultGraph, + RdfFormat::NTriples, + GraphName::DefaultGraph, None, )?; assert_eq!(store.len()?, 1); @@ -161,7 +161,7 @@ fn test_bulk_load_graph_lenient() -> Result<(), Box> { #[test] fn test_load_dataset() -> Result<(), Box> { let store = Store::new()?; - store.load_dataset(GRAPH_DATA.as_bytes(), DatasetFormat::TriG, None)?; + store.load_dataset(GRAPH_DATA.as_bytes(), RdfFormat::TriG, None)?; for q in quads(NamedNodeRef::new_unchecked( "http://www.wikidata.org/wiki/Special:EntityData/Q90", )) { @@ -177,7 +177,7 @@ fn test_bulk_load_dataset() -> Result<(), Box> { let store = Store::new()?; store .bulk_loader() - .load_dataset(GRAPH_DATA.as_bytes(), DatasetFormat::TriG, None)?; + .load_dataset(GRAPH_DATA.as_bytes(), RdfFormat::TriG, None)?; let graph_name = NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"); for q in quads(graph_name) { @@ -194,8 +194,8 @@ fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box> { for _ in 0..2 { store.load_graph( "_:a .".as_bytes(), - GraphFormat::NTriples, - GraphNameRef::DefaultGraph, + RdfFormat::NTriples, + GraphName::DefaultGraph, None, )?; } diff --git a/python/src/io.rs b/python/src/io.rs index 9b6d4075..f6661474 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -1,10 +1,7 @@ #![allow(clippy::needless_option_as_deref)] use crate::model::{PyQuad, PyTriple}; -use oxigraph::io::read::{ParseError, QuadReader, TripleReader}; -use oxigraph::io::{ - DatasetFormat, DatasetParser, GraphFormat, GraphParser, RdfFormat, RdfSerializer, -}; +use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer}; use oxigraph::model::QuadRef; use pyo3::exceptions::{PyIOError, PySyntaxError, PyValueError}; use pyo3::prelude::*; @@ -41,54 +38,54 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> { /// :type mime_type: str /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional +/// :param without_named_graphs: Sets that the parser must fail if parsing a named graph. +/// :type without_named_graphs: bool, optional +/// :param rename_blank_nodes: Renames the blank nodes ids from the ones set in the serialization to random ids. This allows to avoid id conflicts when merging graphs together. +/// :type rename_blank_nodes: bool, optional /// :return: an iterator of RDF triples or quads depending on the format. -/// :rtype: iterator(Triple) or iterator(Quad) +/// :rtype: iterator(Quad) /// :raises ValueError: if the MIME type is not supported. /// :raises SyntaxError: if the provided data is invalid. /// /// >>> input = io.BytesIO(b'

"1" .') /// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) -/// [ predicate= object=>>] +/// [ predicate= object=> graph_name=>] #[pyfunction] -#[pyo3(signature = (input, mime_type, *, base_iri = None))] +#[pyo3(signature = (input, mime_type, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] pub fn parse( input: PyObject, mime_type: &str, base_iri: Option<&str>, + without_named_graphs: bool, + rename_blank_nodes: bool, py: Python<'_>, ) -> PyResult { + let Some(format) = RdfFormat::from_media_type(mime_type) else { + return Err(PyValueError::new_err(format!( + "Not supported MIME type: {mime_type}" + ))); + }; let input = if let Ok(path) = input.extract::(py) { PyReadable::from_file(&path, py).map_err(map_io_err)? } else { PyReadable::from_data(input, py) }; - if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { - let mut parser = GraphParser::from_format(graph_format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - } - Ok(PyTripleReader { - inner: parser.read_triples(input), - } - .into_py(py)) - } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { - let mut parser = DatasetParser::from_format(dataset_format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - } - Ok(PyQuadReader { - inner: parser.read_quads(input), - } - .into_py(py)) - } else { - Err(PyValueError::new_err(format!( - "Not supported MIME type: {mime_type}" - ))) + let mut parser = RdfParser::from_format(format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| PyValueError::new_err(e.to_string()))?; } + if without_named_graphs { + parser = parser.without_named_graphs(); + } + if rename_blank_nodes { + parser = parser.rename_blank_nodes(); + } + Ok(PyQuadReader { + inner: parser.parse_read(input), + } + .into_py(py)) } /// Serializes an RDF graph or dataset. @@ -151,30 +148,9 @@ pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_ writer.finish().map_err(map_io_err) } -#[pyclass(name = "TripleReader", module = "pyoxigraph")] -pub struct PyTripleReader { - inner: TripleReader, -} - -#[pymethods] -impl PyTripleReader { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef { - slf - } - - fn __next__(&mut self, py: Python<'_>) -> PyResult> { - py.allow_threads(|| { - self.inner - .next() - .map(|q| Ok(q.map_err(map_parse_error)?.into())) - .transpose() - }) - } -} - #[pyclass(name = "QuadReader", module = "pyoxigraph")] pub struct PyQuadReader { - inner: QuadReader, + inner: FromReadQuadReader, } #[pymethods] diff --git a/python/src/store.rs b/python/src/store.rs index 692b485f..cfb10a3a 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -3,7 +3,7 @@ use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error, PyReadable, PyWritable}; use crate::model::*; use crate::sparql::*; -use oxigraph::io::{DatasetFormat, GraphFormat, RdfFormat}; +use oxigraph::io::RdfFormat; use oxigraph::model::{GraphName, GraphNameRef}; use oxigraph::sparql::Update; use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store}; @@ -366,7 +366,7 @@ impl PyStore { /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None, optional /// :rtype: None - /// :raises ValueError: if the MIME type is not supported or the `to_graph` parameter is given with a quad file. + /// :raises ValueError: if the MIME type is not supported. /// :raises SyntaxError: if the provided data is invalid. /// :raises IOError: if an I/O error happens during a quad insertion. /// @@ -383,6 +383,11 @@ impl PyStore { to_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { + let Some(format) = RdfFormat::from_media_type(mime_type) else { + return Err(PyValueError::new_err(format!( + "Not supported MIME type: {mime_type}" + ))); + }; let to_graph_name = if let Some(graph_name) = to_graph { Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) } else { @@ -394,29 +399,13 @@ impl PyStore { PyReadable::from_data(input, py) }; py.allow_threads(|| { - if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { - self.inner - .load_graph( - input, - graph_format, - to_graph_name.as_ref().unwrap_or(&GraphName::DefaultGraph), - base_iri, - ) - .map_err(map_loader_error) - } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { - if to_graph_name.is_some() { - return Err(PyValueError::new_err( - "The target graph name parameter is not available for dataset formats", - )); - } + if let Some(to_graph_name) = to_graph_name { self.inner - .load_dataset(input, dataset_format, base_iri) - .map_err(map_loader_error) + .load_graph(input, format, to_graph_name, base_iri) } else { - Err(PyValueError::new_err(format!( - "Not supported MIME type: {mime_type}" - ))) + self.inner.load_dataset(input, format, base_iri) } + .map_err(map_loader_error) }) } @@ -448,7 +437,7 @@ impl PyStore { /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None, optional /// :rtype: None - /// :raises ValueError: if the MIME type is not supported or the `to_graph` parameter is given with a quad file. + /// :raises ValueError: if the MIME type is not supported. /// :raises SyntaxError: if the provided data is invalid. /// :raises IOError: if an I/O error happens during a quad insertion. /// @@ -465,6 +454,11 @@ impl PyStore { to_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { + let Some(format) = RdfFormat::from_media_type(mime_type) else { + return Err(PyValueError::new_err(format!( + "Not supported MIME type: {mime_type}" + ))); + }; let to_graph_name = if let Some(graph_name) = to_graph { Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) } else { @@ -476,31 +470,16 @@ impl PyStore { PyReadable::from_data(input, py) }; py.allow_threads(|| { - if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { + if let Some(to_graph_name) = to_graph_name { self.inner .bulk_loader() - .load_graph( - input, - graph_format, - &to_graph_name.unwrap_or(GraphName::DefaultGraph), - base_iri, - ) - .map_err(map_loader_error) - } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { - if to_graph_name.is_some() { - return Err(PyValueError::new_err( - "The target graph name parameter is not available for dataset formats", - )); - } + .load_graph(input, format, to_graph_name, base_iri) + } else { self.inner .bulk_loader() - .load_dataset(input, dataset_format, base_iri) - .map_err(map_loader_error) - } else { - Err(PyValueError::new_err(format!( - "Not supported MIME type: {mime_type}" - ))) + .load_dataset(input, format, base_iri) } + .map_err(map_loader_error) }) } @@ -542,11 +521,6 @@ impl PyStore { from_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { - let output = if let Ok(path) = output.extract::(py) { - PyWritable::from_file(&path, py).map_err(map_io_err)? - } else { - PyWritable::from_data(output) - }; let Some(format) = RdfFormat::from_media_type(mime_type) else { return Err(PyValueError::new_err(format!( "Not supported MIME type: {mime_type}" @@ -557,6 +531,11 @@ impl PyStore { } else { None }; + let output = if let Ok(path) = output.extract::(py) { + PyWritable::from_file(&path, py).map_err(map_io_err)? + } else { + PyWritable::from_data(output) + }; py.allow_threads(|| { if let Some(from_graph_name) = &from_graph_name { self.inner.dump_graph(output, format, from_graph_name) @@ -860,6 +839,7 @@ pub fn map_loader_error(error: LoaderError) -> PyErr { match error { LoaderError::Storage(error) => map_storage_error(error), LoaderError::Parsing(error) => map_parse_error(error), + LoaderError::InvalidBaseIri { .. } => PyValueError::new_err(error.to_string()), } } diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 2d291bc8..d3f535c4 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -2,9 +2,9 @@ import unittest from io import BytesIO, StringIO, UnsupportedOperation from tempfile import NamedTemporaryFile, TemporaryFile -from pyoxigraph import Literal, NamedNode, Quad, Triple, parse, serialize +from pyoxigraph import Literal, NamedNode, Quad, parse, serialize -EXAMPLE_TRIPLE = Triple( +EXAMPLE_TRIPLE = Quad( NamedNode("http://example.com/foo"), NamedNode("http://example.com/p"), Literal("éù"), @@ -83,11 +83,40 @@ class TestParse(unittest.TestCase): [EXAMPLE_QUAD], ) + def test_parse_without_named_graphs(self) -> None: + with self.assertRaises(SyntaxError) as _: + list( + parse( + StringIO(' {

"1" }'), + "application/trig", + base_iri="http://example.com/", + without_named_graphs=True, + ) + ) + + def test_parse_rename_blank_nodes(self) -> None: + self.assertNotEqual( + list( + parse( + StringIO('_:s "o" .'), + "application/n-triples", + rename_blank_nodes=True, + ) + ), + list( + parse( + StringIO('_:s "o" .'), + "application/n-triples", + rename_blank_nodes=True, + ) + ), + ) + class TestSerialize(unittest.TestCase): def test_serialize_to_bytes_io(self) -> None: output = BytesIO() - serialize([EXAMPLE_TRIPLE], output, "text/turtle") + serialize([EXAMPLE_TRIPLE.triple], output, "text/turtle") self.assertEqual( output.getvalue().decode(), ' "éù" .\n', diff --git a/server/src/main.rs b/server/src/main.rs index eb9b3116..5749d7f6 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -1,10 +1,10 @@ #![allow(clippy::print_stderr, clippy::cast_precision_loss, clippy::use_debug)] -use anyhow::{anyhow, bail, ensure, Context, Error}; +use anyhow::{anyhow, bail, ensure, Context}; use clap::{Parser, Subcommand}; use flate2::read::MultiGzDecoder; use oxhttp::model::{Body, HeaderName, HeaderValue, Method, Request, Response, Status}; use oxhttp::Server; -use oxigraph::io::{DatasetFormat, GraphFormat, RdfFormat, RdfSerializer}; +use oxigraph::io::{RdfFormat, RdfSerializer}; use oxigraph::model::{ GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode, }; @@ -295,13 +295,13 @@ pub fn main() -> anyhow::Result<()> { Store::new() }?; let format = if let Some(format) = format { - Some(GraphOrDatasetFormat::from_str(&format)?) + Some(rdf_format_from_name(&format)?) } else { None }; let graph = if let Some(iri) = &graph { Some( - NamedNodeRef::new(iri) + NamedNode::new(iri) .with_context(|| format!("The target graph name {iri} is invalid"))?, ) } else { @@ -342,6 +342,7 @@ pub fn main() -> anyhow::Result<()> { .scope(|s| { for file in file { let store = store.clone(); + let graph = graph.clone(); s.spawn(move |_| { let f = file.clone(); let start = Instant::now(); @@ -379,10 +380,8 @@ pub fn main() -> anyhow::Result<()> { &loader, MultiGzDecoder::new(fp), format.unwrap_or_else(|| { - GraphOrDatasetFormat::from_path( - &file.with_extension(""), - ) - .unwrap() + rdf_format_from_path(&file.with_extension("")) + .unwrap() }), None, graph, @@ -392,7 +391,7 @@ pub fn main() -> anyhow::Result<()> { &loader, fp, format.unwrap_or_else(|| { - GraphOrDatasetFormat::from_path(&file).unwrap() + rdf_format_from_path(&file).unwrap() }), None, graph, @@ -432,7 +431,7 @@ pub fn main() -> anyhow::Result<()> { }; let graph = if let Some(graph) = &graph { Some(if graph.eq_ignore_ascii_case("default") { - GraphName::DefaultGraph + GraphNameRef::DefaultGraph } else { NamedNodeRef::new(graph) .with_context(|| format!("The target graph name {graph} is invalid"))? @@ -637,24 +636,15 @@ pub fn main() -> anyhow::Result<()> { fn bulk_load( loader: &BulkLoader, reader: impl Read, - format: GraphOrDatasetFormat, + format: RdfFormat, base_iri: Option<&str>, - to_graph_name: Option>, + to_graph_name: Option, ) -> anyhow::Result<()> { - match format { - GraphOrDatasetFormat::Graph(format) => loader.load_graph( - reader, - format, - to_graph_name.map_or(GraphNameRef::DefaultGraph, GraphNameRef::from), - base_iri, - )?, - GraphOrDatasetFormat::Dataset(format) => { - if to_graph_name.is_some() { - bail!("The --graph option is not allowed when loading a dataset format like NQuads or TriG"); - } - loader.load_dataset(reader, format, base_iri)? - } - } + if let Some(to_graph_name) = to_graph_name { + loader.load_graph(reader, format, to_graph_name, base_iri) + } else { + loader.load_dataset(reader, format, base_iri) + }?; Ok(()) } @@ -662,57 +652,17 @@ fn dump( store: &Store, writer: impl Write, format: RdfFormat, - to_graph_name: Option, + from_graph_name: Option>, ) -> anyhow::Result<()> { - ensure!(format.supports_datasets() || to_graph_name.is_some(), "The --graph option is required when writing a format not supporting datasets like NTriples, Turtle or RDF/XML"); - if let Some(to_graph_name) = &to_graph_name { - store.dump_graph(writer, format, to_graph_name) + ensure!(format.supports_datasets() || from_graph_name.is_some(), "The --graph option is required when writing a format not supporting datasets like NTriples, Turtle or RDF/XML"); + if let Some(from_graph_name) = from_graph_name { + store.dump_graph(writer, format, from_graph_name) } else { store.dump_dataset(writer, format) }?; Ok(()) } -#[derive(Copy, Clone)] -enum GraphOrDatasetFormat { - Graph(GraphFormat), - Dataset(DatasetFormat), -} - -impl GraphOrDatasetFormat { - fn from_path(path: &Path) -> anyhow::Result { - format_from_path(path, Self::from_extension) - } - - fn from_extension(name: &str) -> anyhow::Result { - Ok(match (GraphFormat::from_extension(name), DatasetFormat::from_extension(name)) { - (Some(g), Some(d)) => bail!("The file extension '{name}' can be resolved to both '{}' and '{}', not sure what to pick", g.file_extension(), d.file_extension()), - (Some(g), None) => Self::Graph(g), - (None, Some(d)) => Self::Dataset(d), - (None, None) => - bail!("The file extension '{name}' is unknown") - }) - } - - fn from_media_type(name: &str) -> anyhow::Result { - Ok( - match ( - GraphFormat::from_media_type(name), - DatasetFormat::from_media_type(name), - ) { - (Some(g), Some(d)) => bail!( - "The media type '{name}' can be resolved to both '{}' and '{}', not sure what to pick", - g.file_extension(), - d.file_extension() - ), - (Some(g), None) => Self::Graph(g), - (None, Some(d)) => Self::Dataset(d), - (None, None) => bail!("The media type '{name}' is unknown"), - }, - ) - } -} - fn format_from_path( path: &Path, from_extension: impl FnOnce(&str) -> anyhow::Result, @@ -731,20 +681,6 @@ fn format_from_path( } } -impl FromStr for GraphOrDatasetFormat { - type Err = Error; - - fn from_str(name: &str) -> anyhow::Result { - if let Ok(t) = Self::from_extension(name) { - return Ok(t); - } - if let Ok(t) = Self::from_media_type(name) { - return Ok(t); - } - bail!("The file format '{name}' is unknown") - } -} - fn rdf_format_from_path(path: &Path) -> anyhow::Result { format_from_path(path, |ext| { RdfFormat::from_extension(ext) @@ -978,7 +914,7 @@ fn handle_request( let content_type = content_type(request).ok_or_else(|| bad_request("No Content-Type given"))?; if let Some(target) = store_target(request)? { - let format = GraphFormat::from_media_type(&content_type) + let format = RdfFormat::from_media_type(&content_type) .ok_or_else(|| unsupported_media_type(&content_type))?; let new = !match &target { NamedGraphName::NamedNode(target) => { @@ -1002,7 +938,7 @@ fn handle_request( true } }; - web_load_graph(&store, request, format, GraphName::from(target).as_ref())?; + web_load_graph(&store, request, format, &GraphName::from(target))?; Ok(Response::builder(if new { Status::CREATED } else { @@ -1010,7 +946,7 @@ fn handle_request( }) .build()) } else { - let format = DatasetFormat::from_media_type(&content_type) + let format = RdfFormat::from_media_type(&content_type) .ok_or_else(|| unsupported_media_type(&content_type))?; store.clear().map_err(internal_server_error)?; web_load_dataset(&store, request, format)?; @@ -1054,10 +990,10 @@ fn handle_request( let content_type = content_type(request).ok_or_else(|| bad_request("No Content-Type given"))?; if let Some(target) = store_target(request)? { - let format = GraphFormat::from_media_type(&content_type) + let format = RdfFormat::from_media_type(&content_type) .ok_or_else(|| unsupported_media_type(&content_type))?; let new = assert_that_graph_exists(&store, &target).is_ok(); - web_load_graph(&store, request, format, GraphName::from(target).as_ref())?; + web_load_graph(&store, request, format, &GraphName::from(target))?; Ok(Response::builder(if new { Status::CREATED } else { @@ -1065,22 +1001,19 @@ fn handle_request( }) .build()) } else { - match GraphOrDatasetFormat::from_media_type(&content_type) - .map_err(|_| unsupported_media_type(&content_type))? - { - GraphOrDatasetFormat::Graph(format) => { - let graph = - resolve_with_base(request, &format!("/store/{:x}", random::()))?; - web_load_graph(&store, request, format, graph.as_ref().into())?; - Ok(Response::builder(Status::CREATED) - .with_header(HeaderName::LOCATION, graph.into_string()) - .unwrap() - .build()) - } - GraphOrDatasetFormat::Dataset(format) => { - web_load_dataset(&store, request, format)?; - Ok(Response::builder(Status::NO_CONTENT).build()) - } + let format = RdfFormat::from_media_type(&content_type) + .ok_or_else(|| unsupported_media_type(&content_type))?; + if format.supports_datasets() { + web_load_dataset(&store, request, format)?; + Ok(Response::builder(Status::NO_CONTENT).build()) + } else { + let graph = + resolve_with_base(request, &format!("/store/{:x}", random::()))?; + web_load_graph(&store, request, format, &graph.clone().into())?; + Ok(Response::builder(Status::CREATED) + .with_header(HeaderName::LOCATION, graph.as_str()) + .unwrap() + .build()) } } } @@ -1531,10 +1464,10 @@ fn content_type(request: &Request) -> Option { fn web_load_graph( store: &Store, request: &mut Request, - format: GraphFormat, - to_graph_name: GraphNameRef<'_>, + format: RdfFormat, + to_graph_name: &GraphName, ) -> Result<(), HttpError> { - let base_iri = if let GraphNameRef::NamedNode(graph_name) = to_graph_name { + let base_iri = if let GraphName::NamedNode(graph_name) = to_graph_name { Some(graph_name.as_str()) } else { None @@ -1543,11 +1476,11 @@ fn web_load_graph( web_bulk_loader(store, request).load_graph( request.body_mut(), format, - to_graph_name, + to_graph_name.clone(), base_iri, ) } else { - store.load_graph(request.body_mut(), format, to_graph_name, base_iri) + store.load_graph(request.body_mut(), format, to_graph_name.clone(), base_iri) } .map_err(loader_to_http_error) } @@ -1555,7 +1488,7 @@ fn web_load_graph( fn web_load_dataset( store: &Store, request: &mut Request, - format: DatasetFormat, + format: RdfFormat, ) -> Result<(), HttpError> { if url_query_parameter(request, "no_transaction").is_some() { web_bulk_loader(store, request).load_dataset(request.body_mut(), format, None) @@ -1616,6 +1549,7 @@ fn loader_to_http_error(e: LoaderError) -> HttpError { match e { LoaderError::Parsing(e) => bad_request(e), LoaderError::Storage(e) => internal_server_error(e), + LoaderError::InvalidBaseIri { .. } => bad_request(e), } } diff --git a/testsuite/src/files.rs b/testsuite/src/files.rs index 7e4f3b17..fc7f5a20 100644 --- a/testsuite/src/files.rs +++ b/testsuite/src/files.rs @@ -1,5 +1,5 @@ use anyhow::{anyhow, bail, Context, Result}; -use oxigraph::io::{DatasetFormat, DatasetParser, GraphFormat, GraphParser}; +use oxigraph::io::{RdfFormat, RdfParser}; use oxigraph::model::{Dataset, Graph}; use oxttl::n3::N3Quad; use oxttl::N3Parser; @@ -33,14 +33,14 @@ pub fn read_file_to_string(url: &str) -> Result { pub fn load_to_graph( url: &str, graph: &mut Graph, - format: GraphFormat, + format: RdfFormat, ignore_errors: bool, ) -> Result<()> { - let parser = GraphParser::from_format(format).with_base_iri(url)?; - for t in parser.read_triples(read_file(url)?) { + let parser = RdfParser::from_format(format).with_base_iri(url)?; + for t in parser.parse_read(read_file(url)?) { match t { Ok(t) => { - graph.insert(&t); + graph.insert(&t.into()); } Err(e) => { if !ignore_errors { @@ -52,26 +52,20 @@ pub fn load_to_graph( Ok(()) } -pub fn load_graph(url: &str, format: GraphFormat, ignore_errors: bool) -> Result { +pub fn load_graph(url: &str, format: RdfFormat, ignore_errors: bool) -> Result { let mut graph = Graph::new(); load_to_graph(url, &mut graph, format, ignore_errors)?; Ok(graph) } -pub fn guess_graph_format(url: &str) -> Result { - url.rsplit_once('.') - .and_then(|(_, extension)| GraphFormat::from_extension(extension)) - .ok_or_else(|| anyhow!("Serialization type not found for {url}")) -} - pub fn load_to_dataset( url: &str, dataset: &mut Dataset, - format: DatasetFormat, + format: RdfFormat, ignore_errors: bool, ) -> Result<()> { - let parser = DatasetParser::from_format(format).with_base_iri(url)?; - for q in parser.read_quads(read_file(url)?) { + let parser = RdfParser::from_format(format).with_base_iri(url)?; + for q in parser.parse_read(read_file(url)?) { match q { Ok(q) => { dataset.insert(&q); @@ -86,15 +80,15 @@ pub fn load_to_dataset( Ok(()) } -pub fn load_dataset(url: &str, format: DatasetFormat, ignore_errors: bool) -> Result { +pub fn load_dataset(url: &str, format: RdfFormat, ignore_errors: bool) -> Result { let mut dataset = Dataset::new(); load_to_dataset(url, &mut dataset, format, ignore_errors)?; Ok(dataset) } -pub fn guess_dataset_format(url: &str) -> Result { +pub fn guess_rdf_format(url: &str) -> Result { url.rsplit_once('.') - .and_then(|(_, extension)| DatasetFormat::from_extension(extension)) + .and_then(|(_, extension)| RdfFormat::from_extension(extension)) .ok_or_else(|| anyhow!("Serialization type not found for {url}")) } diff --git a/testsuite/src/manifest.rs b/testsuite/src/manifest.rs index f854b95e..fd450fe2 100644 --- a/testsuite/src/manifest.rs +++ b/testsuite/src/manifest.rs @@ -1,4 +1,4 @@ -use crate::files::{guess_graph_format, load_to_graph}; +use crate::files::{guess_rdf_format, load_to_graph}; use crate::vocab::*; use anyhow::{bail, Result}; use oxigraph::model::vocab::*; @@ -275,7 +275,7 @@ impl TestManifest { return Ok(None); }; self.graph.clear(); - load_to_graph(&url, &mut self.graph, guess_graph_format(&url)?, false)?; + load_to_graph(&url, &mut self.graph, guess_rdf_format(&url)?, false)?; let manifests = self .graph diff --git a/testsuite/src/parser_evaluator.rs b/testsuite/src/parser_evaluator.rs index 975c2e9b..dbf660da 100644 --- a/testsuite/src/parser_evaluator.rs +++ b/testsuite/src/parser_evaluator.rs @@ -1,27 +1,27 @@ use crate::evaluator::TestEvaluator; -use crate::files::{guess_dataset_format, guess_graph_format, load_dataset, load_graph, load_n3}; +use crate::files::{guess_rdf_format, load_dataset, load_n3}; use crate::manifest::Test; -use crate::report::{dataset_diff, graph_diff}; +use crate::report::dataset_diff; use anyhow::{anyhow, bail, Result}; -use oxigraph::io::{DatasetFormat, GraphFormat}; +use oxigraph::io::RdfFormat; use oxigraph::model::{BlankNode, Dataset, Quad}; use oxttl::n3::{N3Quad, N3Term}; pub fn register_parser_tests(evaluator: &mut TestEvaluator) { evaluator.register( "http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax", - |t| evaluate_positive_graph_syntax_test(t, GraphFormat::NTriples), + |t| evaluate_positive_syntax_test(t, RdfFormat::NTriples), ); evaluator.register( "http://www.w3.org/ns/rdftest#TestNQuadsPositiveSyntax", - |t| evaluate_positive_dataset_syntax_test(t, DatasetFormat::NQuads), + |t| evaluate_positive_syntax_test(t, RdfFormat::NQuads), ); evaluator.register( "http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax", - |t| evaluate_positive_graph_syntax_test(t, GraphFormat::Turtle), + |t| evaluate_positive_syntax_test(t, RdfFormat::Turtle), ); evaluator.register("http://www.w3.org/ns/rdftest#TestTrigPositiveSyntax", |t| { - evaluate_positive_dataset_syntax_test(t, DatasetFormat::TriG) + evaluate_positive_syntax_test(t, RdfFormat::TriG) }); evaluator.register( "https://w3c.github.io/N3/tests/test.n3#TestN3PositiveSyntax", @@ -29,47 +29,47 @@ pub fn register_parser_tests(evaluator: &mut TestEvaluator) { ); evaluator.register( "http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax", - |t| evaluate_negative_graph_syntax_test(t, GraphFormat::NTriples), + |t| evaluate_negative_syntax_test(t, RdfFormat::NTriples), ); evaluator.register( "http://www.w3.org/ns/rdftest#TestNQuadsNegativeSyntax", - |t| evaluate_negative_dataset_syntax_test(t, DatasetFormat::NQuads), + |t| evaluate_negative_syntax_test(t, RdfFormat::NQuads), ); evaluator.register( "http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax", - |t| evaluate_negative_graph_syntax_test(t, GraphFormat::Turtle), + |t| evaluate_negative_syntax_test(t, RdfFormat::Turtle), ); evaluator.register("http://www.w3.org/ns/rdftest#TestTrigNegativeSyntax", |t| { - evaluate_negative_dataset_syntax_test(t, DatasetFormat::TriG) + evaluate_negative_syntax_test(t, RdfFormat::TriG) }); evaluator.register("http://www.w3.org/ns/rdftest#TestXMLNegativeSyntax", |t| { - evaluate_negative_graph_syntax_test(t, GraphFormat::RdfXml) + evaluate_negative_syntax_test(t, RdfFormat::RdfXml) }); evaluator.register( "https://w3c.github.io/N3/tests/test.n3#TestN3NegativeSyntax", evaluate_negative_n3_syntax_test, ); evaluator.register("http://www.w3.org/ns/rdftest#TestTurtleEval", |t| { - evaluate_graph_eval_test(t, GraphFormat::Turtle, false) + evaluate_eval_test(t, RdfFormat::Turtle, false) }); evaluator.register("http://www.w3.org/ns/rdftest#TestTrigEval", |t| { - evaluate_dataset_eval_test(t, DatasetFormat::TriG, false) + evaluate_eval_test(t, RdfFormat::TriG, false) }); evaluator.register("http://www.w3.org/ns/rdftest#TestXMLEval", |t| { - evaluate_graph_eval_test(t, GraphFormat::RdfXml, false) + evaluate_eval_test(t, RdfFormat::RdfXml, false) }); evaluator.register("https://w3c.github.io/N3/tests/test.n3#TestN3Eval", |t| { evaluate_n3_eval_test(t, false) }); evaluator.register("http://www.w3.org/ns/rdftest#TestTurtleNegativeEval", |t| { - evaluate_negative_graph_syntax_test(t, GraphFormat::Turtle) + evaluate_negative_syntax_test(t, RdfFormat::Turtle) }); evaluator.register("http://www.w3.org/ns/rdftest#TestTrigNegativeEval", |t| { - evaluate_negative_dataset_syntax_test(t, DatasetFormat::TriG) + evaluate_negative_syntax_test(t, RdfFormat::TriG) }); evaluator.register( "https://w3c.github.io/rdf-canon/tests/vocab#RDFC10EvalTest", - |t| evaluate_positive_dataset_syntax_test(t, DatasetFormat::NQuads), //TODO: not a proper implementation! + |t| evaluate_positive_syntax_test(t, RdfFormat::NQuads), //TODO: not a proper implementation! ); evaluator.register( "https://w3c.github.io/rdf-canon/tests/vocab#RDFC10NegativeEvalTest", @@ -81,11 +81,11 @@ pub fn register_parser_tests(evaluator: &mut TestEvaluator) { ); evaluator.register( "https://github.com/oxigraph/oxigraph/tests#TestNTripleRecovery", - |t| evaluate_graph_eval_test(t, GraphFormat::NTriples, true), + |t| evaluate_eval_test(t, RdfFormat::NTriples, true), ); evaluator.register( "https://github.com/oxigraph/oxigraph/tests#TestTurtleRecovery", - |t| evaluate_graph_eval_test(t, GraphFormat::Turtle, true), + |t| evaluate_eval_test(t, RdfFormat::Turtle, true), ); evaluator.register( "https://github.com/oxigraph/oxigraph/tests#TestN3Recovery", @@ -93,16 +93,7 @@ pub fn register_parser_tests(evaluator: &mut TestEvaluator) { ); } -fn evaluate_positive_graph_syntax_test(test: &Test, format: GraphFormat) -> Result<()> { - let action = test - .action - .as_deref() - .ok_or_else(|| anyhow!("No action found for test {test}"))?; - load_graph(action, format, false).map_err(|e| anyhow!("Parse error: {e}"))?; - Ok(()) -} - -fn evaluate_positive_dataset_syntax_test(test: &Test, format: DatasetFormat) -> Result<()> { +fn evaluate_positive_syntax_test(test: &Test, format: RdfFormat) -> Result<()> { let action = test .action .as_deref() @@ -120,18 +111,7 @@ fn evaluate_positive_n3_syntax_test(test: &Test) -> Result<()> { Ok(()) } -fn evaluate_negative_graph_syntax_test(test: &Test, format: GraphFormat) -> Result<()> { - let action = test - .action - .as_deref() - .ok_or_else(|| anyhow!("No action found for test {test}"))?; - match load_graph(action, format, false) { - Ok(_) => bail!("File parsed without errors even if it should not"), - Err(_) => Ok(()), - } -} - -fn evaluate_negative_dataset_syntax_test(test: &Test, format: DatasetFormat) -> Result<()> { +fn evaluate_negative_syntax_test(test: &Test, format: RdfFormat) -> Result<()> { let action = test .action .as_deref() @@ -153,36 +133,7 @@ fn evaluate_negative_n3_syntax_test(test: &Test) -> Result<()> { } } -fn evaluate_graph_eval_test(test: &Test, format: GraphFormat, ignore_errors: bool) -> Result<()> { - let action = test - .action - .as_deref() - .ok_or_else(|| anyhow!("No action found for test {test}"))?; - let mut actual_graph = load_graph(action, format, ignore_errors) - .map_err(|e| anyhow!("Parse error on file {action}: {e}"))?; - actual_graph.canonicalize(); - let results = test - .result - .as_ref() - .ok_or_else(|| anyhow!("No tests result found"))?; - let mut expected_graph = load_graph(results, guess_graph_format(results)?, false) - .map_err(|e| anyhow!("Parse error on file {results}: {e}"))?; - expected_graph.canonicalize(); - if expected_graph == actual_graph { - Ok(()) - } else { - bail!( - "The two files are not isomorphic. Diff:\n{}", - graph_diff(&expected_graph, &actual_graph) - ) - } -} - -fn evaluate_dataset_eval_test( - test: &Test, - format: DatasetFormat, - ignore_errors: bool, -) -> Result<()> { +fn evaluate_eval_test(test: &Test, format: RdfFormat, ignore_errors: bool) -> Result<()> { let action = test .action .as_deref() @@ -194,7 +145,7 @@ fn evaluate_dataset_eval_test( .result .as_ref() .ok_or_else(|| anyhow!("No tests result found"))?; - let mut expected_dataset = load_dataset(results, guess_dataset_format(results)?, false) + let mut expected_dataset = load_dataset(results, guess_rdf_format(results)?, false) .map_err(|e| anyhow!("Parse error on file {results}: {e}"))?; expected_dataset.canonicalize(); if expected_dataset == actual_dataset { diff --git a/testsuite/src/report.rs b/testsuite/src/report.rs index ba24ee2c..be27110a 100644 --- a/testsuite/src/report.rs +++ b/testsuite/src/report.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use oxigraph::model::{Dataset, Graph, NamedNode}; +use oxigraph::model::{Dataset, NamedNode}; use std::fmt::Write; use text_diff::{diff, Difference}; use time::format_description::well_known::Rfc3339; @@ -26,20 +26,6 @@ fn normalize_dataset_text(store: &Dataset) -> String { quads.join("\n") } -pub(super) fn graph_diff(expected: &Graph, actual: &Graph) -> String { - format_diff( - &normalize_graph_text(expected), - &normalize_graph_text(actual), - "triples", - ) -} - -fn normalize_graph_text(store: &Graph) -> String { - let mut triples: Vec<_> = store.iter().map(|q| q.to_string()).collect(); - triples.sort(); - triples.join("\n") -} - pub(super) fn format_diff(expected: &str, actual: &str, kind: &str) -> String { let (_, changeset) = diff(expected, actual, "\n"); let mut ret = String::new(); diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index ab2b6643..f440b6ad 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -158,7 +158,7 @@ fn evaluate_evaluation_test(test: &Test) -> Result<()> { load_dataset_to_store(data, &store)?; } for (name, value) in &test.graph_data { - load_graph_to_store(value, &store, name)?; + load_graph_to_store(value, &store, name.clone())?; } let query_file = test .query @@ -251,7 +251,7 @@ fn evaluate_update_evaluation_test(test: &Test) -> Result<()> { load_dataset_to_store(data, &store)?; } for (name, value) in &test.graph_data { - load_graph_to_store(value, &store, name)?; + load_graph_to_store(value, &store, name.clone())?; } let result_store = Store::new()?; @@ -259,7 +259,7 @@ fn evaluate_update_evaluation_test(test: &Test) -> Result<()> { load_dataset_to_store(data, &result_store)?; } for (name, value) in &test.result_graph_data { - load_graph_to_store(value, &result_store, name)?; + load_graph_to_store(value, &result_store, name.clone())?; } let update_file = test @@ -301,7 +301,7 @@ fn load_sparql_query_result(url: &str) -> Result { false, ) } else { - StaticQueryResults::from_graph(&load_graph(url, guess_graph_format(url)?, false)?) + StaticQueryResults::from_graph(&load_graph(url, guess_rdf_format(url)?, false)?) } } @@ -698,14 +698,14 @@ fn solutions_to_string(solutions: Vec>, ordered: bool) -> lines.join("\n") } -fn load_graph_to_store<'a>( +fn load_graph_to_store( url: &str, store: &Store, - to_graph_name: impl Into>, + to_graph_name: impl Into, ) -> Result<()> { store.load_graph( read_file(url)?, - guess_graph_format(url)?, + guess_rdf_format(url)?, to_graph_name, Some(url), )?; @@ -713,16 +713,7 @@ fn load_graph_to_store<'a>( } fn load_dataset_to_store(url: &str, store: &Store) -> Result<()> { - if let Ok(format) = guess_dataset_format(url) { - store.load_dataset(read_file(url)?, format, Some(url)) - } else { - store.load_graph( - read_file(url)?, - guess_graph_format(url)?, - GraphNameRef::DefaultGraph, - Some(url), - ) - }?; + store.load_dataset(read_file(url)?, guess_rdf_format(url)?, Some(url))?; Ok(()) }