From ddf589ea14a7a57173ad00d0203a5feb85252878 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 17 Nov 2023 17:55:33 +0100 Subject: [PATCH] Python: Introduces enums for RDF and SPARQL result formats --- python/docs/io.rst | 11 +- python/docs/model.rst | 13 +- python/docs/sparql.rst | 17 +- python/docs/store.rst | 3 +- python/generate_stubs.py | 11 ++ python/src/io.rs | 336 +++++++++++++++++++++++++++---------- python/src/lib.rs | 2 + python/src/model.rs | 2 +- python/src/sparql.rs | 273 ++++++++++++++++++++++++------ python/src/store.rs | 80 ++++----- python/tests/test_io.py | 50 +++--- python/tests/test_store.py | 40 +++-- 12 files changed, 597 insertions(+), 241 deletions(-) diff --git a/python/docs/io.rst b/python/docs/io.rst index 8be36f43..edf3fba0 100644 --- a/python/docs/io.rst +++ b/python/docs/io.rst @@ -1,14 +1,21 @@ RDF Parsing and Serialization ============================= +.. py:currentmodule:: pyoxigraph Oxigraph provides functions to parse and serialize RDF files: Parsing """"""" -.. autofunction:: pyoxigraph.parse +.. autofunction:: parse Serialization """"""""""""" -.. autofunction:: pyoxigraph.serialize +.. autofunction:: serialize + + +Formats +""""""" +.. autoclass:: RdfFormat + :members: diff --git a/python/docs/model.rst b/python/docs/model.rst index 1950e853..28a7279c 100644 --- a/python/docs/model.rst +++ b/python/docs/model.rst @@ -1,37 +1,38 @@ RDF Model ========= +.. py:currentmodule:: pyoxigraph Oxigraph provides python classes to represents basic RDF concepts: `IRIs `_ """"""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: pyoxigraph.NamedNode +.. autoclass:: NamedNode :members: `Blank Nodes `_ """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: pyoxigraph.BlankNode +.. autoclass:: BlankNode :members: `Literals `_ """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: pyoxigraph.Literal +.. autoclass:: Literal :members: `Triples `_ """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: pyoxigraph.Triple +.. autoclass:: Triple :members: Quads (`triples `_ in a `RDF dataset `_) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: pyoxigraph.Quad +.. autoclass:: Quad :members: -.. autoclass:: pyoxigraph.DefaultGraph +.. autoclass:: DefaultGraph :members: diff --git a/python/docs/sparql.rst b/python/docs/sparql.rst index 1e87d179..824a42cb 100644 --- a/python/docs/sparql.rst +++ b/python/docs/sparql.rst @@ -1,32 +1,33 @@ SPARQL utility objects -============================= +====================== +.. py:currentmodule:: pyoxigraph Oxigraph provides also some utilities related to SPARQL queries: - Variable """""""" -.. autoclass:: pyoxigraph.Variable +.. autoclass:: Variable :members: ``SELECT`` solutions """""""""""""""""""" -.. autoclass:: pyoxigraph.QuerySolutions +.. autoclass:: QuerySolutions :members: -.. autoclass:: pyoxigraph.QuerySolution +.. autoclass:: QuerySolution :members: ``ASK`` results """"""""""""""" -.. autoclass:: pyoxigraph.QueryBoolean +.. autoclass:: QueryBoolean :members: ``CONSTRUCT`` results """"""""""""""""""""" -.. autoclass:: pyoxigraph.QueryTriples +.. autoclass:: QueryTriples :members: Query results parsing """"""""""""""""""""" -.. autoclass:: pyoxigraph.parse_query_results +.. autofunction:: parse_query_results +.. autoclass:: QueryResultsFormat :members: diff --git a/python/docs/store.rst b/python/docs/store.rst index 2fea9ab3..b6af6ef5 100644 --- a/python/docs/store.rst +++ b/python/docs/store.rst @@ -1,5 +1,6 @@ RDF Store ========= +.. py:currentmodule:: pyoxigraph -.. autoclass:: pyoxigraph.Store +.. autoclass:: Store :members: diff --git a/python/generate_stubs.py b/python/generate_stubs.py index 7d0469ec..5626c8de 100644 --- a/python/generate_stubs.py +++ b/python/generate_stubs.py @@ -141,6 +141,17 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i simple=1, ) ) + elif member_value is not None: + constants.append( + ast.AnnAssign( + target=ast.Name(id=member_name, ctx=ast.Store()), + annotation=concatenated_path_to_type( + member_value.__class__.__name__, element_path, types_to_import + ), + value=ast.Ellipsis(), + simple=1, + ) + ) else: logging.warning(f"Unsupported member {member_name} of class {'.'.join(element_path)}") diff --git a/python/src/io.rs b/python/src/io.rs index 8e24a24d..a87b7982 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -1,9 +1,8 @@ #![allow(clippy::needless_option_as_deref)] -use crate::model::{PyQuad, PyTriple}; +use crate::model::{hash, PyQuad, PyTriple}; use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer}; use oxigraph::model::QuadRef; -use oxigraph::sparql::results::QueryResultsFormat; use pyo3::exceptions::{PySyntaxError, PyValueError}; use pyo3::intern; use pyo3::prelude::*; @@ -19,12 +18,12 @@ use std::sync::OnceLock; /// /// It currently supports the following formats: /// -/// * `N-Triples `_ (``application/n-triples`` or ``nt``) -/// * `N-Quads `_ (``application/n-quads`` or ``nq``) -/// * `Turtle `_ (``text/turtle`` or ``ttl``) -/// * `TriG `_ (``application/trig`` or ``trig``) -/// * `N3 `_ (``text/n3`` or ``n3``) -/// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) +/// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) +/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) +/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) +/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) +/// * `N3 `_ (:py:attr:`RdfFormat.N3`) +/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -32,8 +31,8 @@ use std::sync::OnceLock; /// /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional -/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: str or None, optional +/// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. +/// :type format: RdfFormat or None, optional /// :param path: The file path to read from. Replaces the ``input`` parameter. /// :type path: str or os.PathLike[str] or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. @@ -48,13 +47,13 @@ use std::sync::OnceLock; /// :raises SyntaxError: if the provided data is invalid. /// :raises OSError: if a system error happens while reading the file. /// -/// >>> list(parse(input=b'

"1" .', format="text/turtle", base_iri="http://example.com/")) +/// >>> list(parse(input=b'

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/")) /// [ predicate= object=> graph_name=>] #[pyfunction] #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] pub fn parse( input: Option, - format: Option<&str>, + format: Option, path: Option, base_iri: Option<&str>, without_named_graphs: bool, @@ -62,7 +61,7 @@ pub fn parse( py: Python<'_>, ) -> PyResult { let input = PyReadable::from_args(&path, input, py)?; - let format = parse_format(format, path.as_deref())?; + let format = lookup_rdf_format(format, path.as_deref())?; let mut parser = RdfParser::from_format(format); if let Some(base_iri) = base_iri { parser = parser @@ -86,12 +85,12 @@ pub fn parse( /// /// It currently supports the following formats: /// -/// * `N-Triples `_ (``application/n-triples`` or ``nt``) -/// * `N-Quads `_ (``application/n-quads`` or ``nq``) -/// * `Turtle `_ (``text/turtle`` or ``ttl``) -/// * `TriG `_ (``application/trig`` or ``trig``) -/// * `N3 `_ (``text/n3`` or ``n3``) -/// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) +/// * `canonical `_ `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) +/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) +/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) +/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) +/// * `N3 `_ (:py:attr:`RdfFormat.N3`) +/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -101,31 +100,32 @@ pub fn parse( /// :type input: collections.abc.Iterable[Triple] or collections.abc.Iterable[Quad] /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional -/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: str or None, optional -/// :return: py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. +/// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. +/// :type format: RdfFormat or None, optional +/// :return: :py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. /// :rtype: bytes or None /// :raises ValueError: if the format is not supported. /// :raises TypeError: if a triple is given during a quad format serialization or reverse. /// :raises OSError: if a system error happens while writing the file. /// -/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], format="ttl") +/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], format=RdfFormat.TURTLE) /// b' "1" .\n' /// /// >>> output = io.BytesIO() -/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, "text/turtle") +/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, RdfFormat.TURTLE) /// >>> output.getvalue() /// b' "1" .\n' #[pyfunction] #[pyo3(signature = (input, output = None, format = None))] pub fn serialize<'a>( input: &PyAny, - output: Option<&PyAny>, - format: Option<&str>, + output: Option, + format: Option, py: Python<'a>, ) -> PyResult> { PyWritable::do_write( - |output, format| { + |output, file_path| { + let format = lookup_rdf_format(format, file_path.as_deref())?; let mut writer = RdfSerializer::from_format(format).serialize_to_write(output); for i in input.iter()? { let i = i?; @@ -145,7 +145,6 @@ pub fn serialize<'a>( Ok(writer.finish()?) }, output, - format, py, ) } @@ -174,6 +173,193 @@ impl PyQuadReader { } } +/// RDF serialization formats. +/// +/// The following formats are supported: +/// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) +/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) +/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) +/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) +/// * `N3 `_ (:py:attr:`RdfFormat.N3`) +/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) +#[pyclass(name = "RdfFormat", module = "pyoxigraph")] +#[derive(Clone)] +pub struct PyRdfFormat { + inner: RdfFormat, +} + +#[pymethods] +impl PyRdfFormat { + /// `N3 `_ + #[classattr] + const N3: Self = Self { + inner: RdfFormat::N3, + }; + + /// `N-Quads `_ + #[classattr] + const N_QUADS: Self = Self { + inner: RdfFormat::NQuads, + }; + + /// `N-Triples `_ + #[classattr] + const N_TRIPLES: Self = Self { + inner: RdfFormat::NTriples, + }; + + /// `RDF/XML `_ + #[classattr] + const RDF_XML: Self = Self { + inner: RdfFormat::RdfXml, + }; + + /// `TriG `_ + #[classattr] + const TRIG: Self = Self { + inner: RdfFormat::TriG, + }; + + /// `Turtle `_ + #[classattr] + const TURTLE: Self = Self { + inner: RdfFormat::Turtle, + }; + + /// :return: the format canonical IRI according to the `Unique URIs for file formats registry `_. + /// :rtype: str + /// + /// >>> RdfFormat.N_TRIPLES.iri + /// 'http://www.w3.org/ns/formats/N-Triples' + #[getter] + fn iri(&self) -> &'static str { + self.inner.iri() + } + + /// :return: the format `IANA media type `_. + /// :rtype: str + /// + /// >>> RdfFormat.N_TRIPLES.media_type + /// 'application/n-triples' + #[getter] + fn media_type(&self) -> &'static str { + self.inner.media_type() + } + + /// :return: the format `IANA-registered `_ file extension. + /// :rtype: str + /// + /// >>> RdfFormat.N_TRIPLES.file_extension + /// 'nt' + #[getter] + pub fn file_extension(&self) -> &'static str { + self.inner.file_extension() + } + + /// :return: the format name. + /// :rtype: str + /// + /// >>> RdfFormat.N_TRIPLES.name + /// 'N-Triples' + #[getter] + pub const fn name(&self) -> &'static str { + self.inner.name() + } + + /// :return: if the formats supports `RDF datasets `_ and not only `RDF graphs `_. + /// :rtype: bool + /// + /// >>> RdfFormat.N_TRIPLES.supports_datasets + /// False + /// >>> RdfFormat.N_QUADS.supports_datasets + /// True + #[getter] + pub fn supports_datasets(&self) -> bool { + self.inner.supports_datasets() + } + + /// :return: if the formats supports `RDF-star quoted triples `_. + /// :rtype: bool + /// + /// >>> RdfFormat.N_TRIPLES.supports_rdf_star + /// True + /// >>> RdfFormat.RDF_XML.supports_rdf_star + /// False + #[getter] + pub const fn supports_rdf_star(&self) -> bool { + self.inner.supports_rdf_star() + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example, "application/xml" is going to return RDF/XML even if it is not its canonical media type. + /// + /// :param media_type: the media type. + /// :type media_type: str + /// :return: :py:class:`RdfFormat` if the media type is known or :py:const:`None` if not. + /// :rtype: RdfFormat or None + /// + /// >>> RdfFormat.from_media_type("text/turtle; charset=utf-8") + /// + #[staticmethod] + pub fn from_media_type(media_type: &str) -> Option { + Some(Self { + inner: RdfFormat::from_media_type(media_type)?, + }) + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// :param extension: the extension. + /// :type extension: str + /// :return: :py:class:`RdfFormat` if the extension is known or :py:const:`None` if not. + /// :rtype: RdfFormat or None + /// + /// >>> RdfFormat.from_extension("nt") + /// + #[staticmethod] + pub fn from_extension(extension: &str) -> Option { + Some(Self { + inner: RdfFormat::from_extension(extension)?, + }) + } + + fn __str__(&self) -> &'static str { + self.inner.name() + } + + fn __repr__(&self) -> String { + format!("", self.inner.name()) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __eq__(&self, other: &Self) -> bool { + self.inner == other.inner + } + + fn __ne__(&self, other: &Self) -> bool { + self.inner != other.inner + } + + /// :rtype: RdfFormat + fn __copy__(slf: PyRef<'_, Self>) -> PyRef { + slf + } + + /// :type memo: typing.Any + /// :rtype: RdfFormat + #[allow(unused_variables)] + fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ PyAny) -> PyRef<'a, Self> { + slf + } +} + pub enum PyReadable { Bytes(Cursor>), Io(PyIo), @@ -233,24 +419,20 @@ pub enum PyWritable { } impl PyWritable { - pub fn do_write<'a, F: Format>( - write: impl FnOnce(BufWriter, F) -> PyResult>, - output: Option<&PyAny>, - format: Option<&str>, - py: Python<'a>, - ) -> PyResult> { - let file_path = output.and_then(|output| output.extract::().ok()); - let format = parse_format::(format, file_path.as_deref())?; - let output = if let Some(output) = output { - if let Some(file_path) = &file_path { - Self::File(py.allow_threads(|| File::create(file_path))?) - } else { - Self::Io(PyIo(output.into())) - } - } else { - PyWritable::Bytes(Vec::new()) + pub fn do_write( + write: impl FnOnce(BufWriter, Option) -> PyResult>, + output: Option, + py: Python<'_>, + ) -> PyResult> { + let (output, file_path) = match output { + Some(PyWritableOutput::Path(file_path)) => ( + Self::File(py.allow_threads(|| File::create(&file_path))?), + Some(file_path), + ), + Some(PyWritableOutput::Io(object)) => (Self::Io(PyIo(object)), None), + None => (Self::Bytes(Vec::new()), None), }; - let writer = write(BufWriter::new(output), format)?; + let writer = write(BufWriter::new(output), file_path)?; py.allow_threads(|| writer.into_inner())?.close(py) } @@ -290,6 +472,12 @@ impl Write for PyWritable { } } +#[derive(FromPyObject)] +pub enum PyWritableOutput { + Path(PathBuf), + Io(PyObject), +} + pub struct PyIo(PyObject); impl Read for PyIo { @@ -331,57 +519,23 @@ impl Write for PyIo { } } -pub trait Format: Sized { - fn from_media_type(media_type: &str) -> Option; - fn from_extension(extension: &str) -> Option; -} - -impl Format for RdfFormat { - fn from_media_type(media_type: &str) -> Option { - Self::from_media_type(media_type) - } - - fn from_extension(extension: &str) -> Option { - Self::from_extension(extension) - } -} - -impl Format for QueryResultsFormat { - fn from_media_type(media_type: &str) -> Option { - Self::from_media_type(media_type) +pub fn lookup_rdf_format(format: Option, path: Option<&Path>) -> PyResult { + if let Some(format) = format { + return Ok(format.inner); } - - fn from_extension(extension: &str) -> Option { - Self::from_extension(extension) - } -} - -pub fn parse_format(format: Option<&str>, path: Option<&Path>) -> PyResult { - let format = if let Some(format) = format { - format - } else if let Some(path) = path { - if let Some(ext) = path.extension().and_then(OsStr::to_str) { - ext - } else { - return Err(PyValueError::new_err(format!( - "The file name {} has no extension to guess a file format from", - path.display() - ))); - } - } else { + let Some(path) = path else { return Err(PyValueError::new_err( "The format parameter is required when a file path is not given", )); }; - if format.contains('/') { - F::from_media_type(format).ok_or_else(|| { - PyValueError::new_err(format!("Not supported RDF format media type: {format}")) - }) - } else { - F::from_extension(format).ok_or_else(|| { - PyValueError::new_err(format!("Not supported RDF format extension: {format}")) - }) - } + let Some(ext) = path.extension().and_then(OsStr::to_str) else { + return Err(PyValueError::new_err(format!( + "The file name {} has no extension to guess a file format from", + path.display() + ))); + }; + RdfFormat::from_extension(ext) + .ok_or_else(|| PyValueError::new_err(format!("Not supported RDF format extension: {ext}"))) } pub fn map_parse_error(error: ParseError, file_path: Option) -> PyErr { diff --git a/python/src/lib.rs b/python/src/lib.rs index d1b76022..051e2718 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -35,6 +35,8 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> { module.add_class::()?; module.add_class::()?; module.add_class::()?; + module.add_class::()?; + module.add_class::()?; module.add_wrapped(wrap_pyfunction!(parse))?; module.add_wrapped(wrap_pyfunction!(parse_query_results))?; module.add_wrapped(wrap_pyfunction!(serialize))?; diff --git a/python/src/model.rs b/python/src/model.rs index ffd3f0e1..db3f0258 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -1276,7 +1276,7 @@ fn eq_compare_other_type(op: CompareOp) -> PyResult { } } -fn hash(t: &impl Hash) -> u64 { +pub(crate) fn hash(t: &impl Hash) -> u64 { let mut s = DefaultHasher::new(); t.hash(&mut s); s.finish() diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 863bda9b..36506e02 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -4,8 +4,8 @@ use crate::store::map_storage_error; use oxigraph::io::RdfSerializer; use oxigraph::model::Term; use oxigraph::sparql::results::{ - FromReadQueryResultsReader, FromReadSolutionsReader, ParseError, QueryResultsParser, - QueryResultsSerializer, + FromReadQueryResultsReader, FromReadSolutionsReader, ParseError, QueryResultsFormat, + QueryResultsParser, QueryResultsSerializer, }; use oxigraph::sparql::{ EvaluationError, Query, QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter, @@ -15,8 +15,9 @@ use pyo3::basic::CompareOp; use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; +use std::ffi::OsStr; use std::io; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::vec::IntoIter; pub fn parse_query( @@ -214,18 +215,18 @@ impl PyQuerySolutions { /// /// It currently supports the following formats: /// - /// * `XML `_ (``application/sparql-results+xml`` or ``srx``) - /// * `JSON `_ (``application/sparql-results+json`` or ``srj``) - /// * `CSV `_ (``text/csv`` or ``csv``) - /// * `TSV `_ (``text/tab-separated-values`` or ``tsv``) + /// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) + /// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) + /// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) + /// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) /// /// It supports also some media type and extension aliases. /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: str or None, optional + /// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: QueryResultsFormat or None, optional /// :rtype: bytes or None /// :raises ValueError: if the format is not supported. /// :raises OSError: if a system error happens while writing the file. @@ -233,17 +234,18 @@ impl PyQuerySolutions { /// >>> store = Store() /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> results = store.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o }") - /// >>> results.serialize(format="json") + /// >>> results.serialize(format=QueryResultsFormat.JSON) /// b'{"head":{"vars":["s","p","o"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"},"p":{"type":"uri","value":"http://example.com/p"},"o":{"type":"literal","value":"1"}}]}}' - #[pyo3(signature = (output = None, /, format = None))] + #[pyo3(signature = (output = None, format = None))] fn serialize<'a>( &mut self, - output: Option<&PyAny>, - format: Option<&str>, + output: Option, + format: Option, py: Python<'a>, ) -> PyResult> { PyWritable::do_write( - |output, format| { + |output, file_path| { + let format = lookup_query_results_format(format, file_path.as_deref())?; let mut writer = QueryResultsSerializer::from_format(format) .serialize_solutions_to_write( output, @@ -272,7 +274,6 @@ impl PyQuerySolutions { Ok(writer.finish()?) }, output, - format, py, ) } @@ -314,18 +315,18 @@ impl PyQueryBoolean { /// /// It currently supports the following formats: /// - /// * `XML `_ (``application/sparql-results+xml`` or ``srx``) - /// * `JSON `_ (``application/sparql-results+json`` or ``srj``) - /// * `CSV `_ (``text/csv`` or ``csv``) - /// * `TSV `_ (``text/tab-separated-values`` or ``tsv``) + /// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) + /// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) + /// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) + /// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) /// /// It supports also some media type and extension aliases. /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: str or None, optional + /// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: QueryResultsFormat or None, optional /// :rtype: bytes or None /// :raises ValueError: if the format is not supported. /// :raises OSError: if a system error happens while writing the file. @@ -333,24 +334,24 @@ impl PyQueryBoolean { /// >>> store = Store() /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> results = store.query("ASK { ?s ?p ?o }") - /// >>> results.serialize(format="json") + /// >>> results.serialize(format=QueryResultsFormat.JSON) /// b'{"head":{},"boolean":true}' - #[pyo3(signature = (output = None, /, format = None))] + #[pyo3(signature = (output = None, format = None))] fn serialize<'a>( &mut self, - output: Option<&PyAny>, - format: Option<&str>, + output: Option, + format: Option, py: Python<'a>, ) -> PyResult> { PyWritable::do_write( - |output, format| { + |output, file_path| { + let format = lookup_query_results_format(format, file_path.as_deref())?; py.allow_threads(|| { Ok(QueryResultsSerializer::from_format(format) .serialize_boolean_to_write(output, self.inner)?) }) }, output, - format, py, ) } @@ -389,12 +390,12 @@ impl PyQueryTriples { /// /// It currently supports the following formats: /// - /// * `N-Triples `_ (``application/n-triples`` or ``nt``) - /// * `N-Quads `_ (``application/n-quads`` or ``nq``) - /// * `Turtle `_ (``text/turtle`` or ``ttl``) - /// * `TriG `_ (``application/trig`` or ``trig``) - /// * `N3 `_ (``text/n3`` or ``n3``) - /// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) + /// * `canonical `_ `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) + /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) + /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) + /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) + /// * `N3 `_ (:py:attr:`RdfFormat.N3`) + /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -402,8 +403,8 @@ impl PyQueryTriples { /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: str or None, optional + /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: RdfFormat or None, optional /// :rtype: bytes or None /// :raises ValueError: if the format is not supported. /// :raises OSError: if a system error happens while writing the file. @@ -411,17 +412,18 @@ impl PyQueryTriples { /// >>> store = Store() /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }") - /// >>> results.serialize(format="nt") + /// >>> results.serialize(format=RdfFormat.N_TRIPLES) /// b' "1" .\n' - #[pyo3(signature = (output = None, /, format = None))] + #[pyo3(signature = (output = None, format = None))] fn serialize<'a>( &mut self, - output: Option<&PyAny>, - format: Option<&str>, + output: Option, + format: Option, py: Python<'a>, ) -> PyResult> { PyWritable::do_write( - |output, format| { + |output, file_path| { + let format = lookup_rdf_format(format, file_path.as_deref())?; let mut writer = RdfSerializer::from_format(format).serialize_to_write(output); for triple in &mut self.inner { writer.write_triple(&triple.map_err(map_evaluation_error)?)?; @@ -429,7 +431,6 @@ impl PyQueryTriples { Ok(writer.finish()?) }, output, - format, py, ) } @@ -450,18 +451,17 @@ impl PyQueryTriples { /// /// It currently supports the following formats: /// -/// * `XML `_ (``application/sparql-results+xml`` or ``srx``) -/// * `JSON `_ (``application/sparql-results+json`` or ``srj``) -/// * `CSV `_ (``text/csv`` or ``csv``) -/// * `TSV `_ (``text/tab-separated-values`` or ``tsv``) +/// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) +/// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) +/// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) /// /// It supports also some media type and extension aliases. /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional -/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: str or None, optional +/// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. +/// :type format: QueryResultsFormat or None, optional /// :param path: The file path to read from. Replaces the ``input`` parameter. /// :type path: str or os.PathLike[str] or None, optional /// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`. @@ -470,21 +470,21 @@ impl PyQueryTriples { /// :raises SyntaxError: if the provided data is invalid. /// :raises OSError: if a system error happens while reading the file. /// -/// >>> list(parse_query_results('?s\t?p\t?o\n\t\t1\n', "text/tsv")) +/// >>> list(parse_query_results('?s\t?p\t?o\n\t\t1\n', QueryResultsFormat.TSV)) /// [ p= o=>>] /// -/// >>> parse_query_results('{"head":{},"boolean":true}', "application/sparql-results+json") +/// >>> parse_query_results('{"head":{},"boolean":true}', QueryResultsFormat.JSON) /// #[pyfunction] #[pyo3(signature = (input = None, format = None, *, path = None))] pub fn parse_query_results( input: Option, - format: Option<&str>, + format: Option, path: Option, py: Python<'_>, ) -> PyResult { let input = PyReadable::from_args(&path, input, py)?; - let format = parse_format(format, path.as_deref())?; + let format = lookup_query_results_format(format, path.as_deref())?; let results = QueryResultsParser::from_format(format) .parse_read(input) .map_err(|e| map_query_results_parse_error(e, path.clone()))?; @@ -500,6 +500,177 @@ pub fn parse_query_results( }) } +/// `SPARQL query `_ results serialization formats. +/// +/// The following formats are supported: +/// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) +/// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) +/// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) +/// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) +#[pyclass(name = "QueryResultsFormat", module = "pyoxigraph")] +#[derive(Clone)] +pub struct PyQueryResultsFormat { + inner: QueryResultsFormat, +} + +#[pymethods] +impl PyQueryResultsFormat { + /// `SPARQL Query Results XML Format `_ + #[classattr] + const XML: Self = Self { + inner: QueryResultsFormat::Xml, + }; + + /// `SPARQL Query Results JSON Format `_ + #[classattr] + const JSON: Self = Self { + inner: QueryResultsFormat::Json, + }; + + /// `SPARQL Query Results CSV Format `_ + #[classattr] + const CSV: Self = Self { + inner: QueryResultsFormat::Csv, + }; + + /// `SPARQL Query Results TSV Format `_ + #[classattr] + const TSV: Self = Self { + inner: QueryResultsFormat::Tsv, + }; + + /// :return: the format canonical IRI according to the `Unique URIs for file formats registry `_. + /// :rtype: str + /// + /// >>> QueryResultsFormat.JSON.iri + /// 'http://www.w3.org/ns/formats/SPARQL_Results_JSON' + #[getter] + fn iri(&self) -> &'static str { + self.inner.iri() + } + + /// :return: the format `IANA media type `_. + /// :rtype: str + /// + /// >>> QueryResultsFormat.JSON.media_type + /// 'application/sparql-results+json' + #[getter] + fn media_type(&self) -> &'static str { + self.inner.media_type() + } + + /// :return: the format `IANA-registered `_ file extension. + /// :rtype: str + /// + /// >>> QueryResultsFormat.JSON.file_extension + /// 'srj' + #[getter] + fn file_extension(&self) -> &'static str { + self.inner.file_extension() + } + + /// :return: the format name. + /// :rtype: str + /// + /// >>> QueryResultsFormat.JSON.name + /// 'SPARQL Results in JSON' + #[getter] + pub const fn name(&self) -> &'static str { + self.inner.name() + } + + /// Looks for a known format from a media type. + /// + /// It supports some media type aliases. + /// For example, "application/xml" is going to return :py:const:`QueryResultsFormat.XML` even if it is not its canonical media type. + /// + /// :param media_type: the media type. + /// :type media_type: str + /// :return: :py:class:`QueryResultsFormat` if the media type is known or :py:const:`None` if not. + /// :rtype: QueryResultsFormat or None + /// + /// >>> QueryResultsFormat.from_media_type("application/sparql-results+json; charset=utf-8") + /// + #[staticmethod] + fn from_media_type(media_type: &str) -> Option { + Some(Self { + inner: QueryResultsFormat::from_media_type(media_type)?, + }) + } + + /// Looks for a known format from an extension. + /// + /// It supports some aliases. + /// + /// :param extension: the extension. + /// :type extension: str + /// :return: :py:class:`QueryResultsFormat` if the extension is known or :py:const:`None` if not. + /// :rtype: QueryResultsFormat or None + /// + /// >>> QueryResultsFormat.from_extension("json") + /// + #[staticmethod] + fn from_extension(extension: &str) -> Option { + Some(Self { + inner: QueryResultsFormat::from_extension(extension)?, + }) + } + + fn __str__(&self) -> &'static str { + self.inner.name() + } + + fn __repr__(&self) -> String { + format!("", self.inner.name()) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __eq__(&self, other: &Self) -> bool { + self.inner == other.inner + } + + fn __ne__(&self, other: &Self) -> bool { + self.inner != other.inner + } + + /// :rtype: QueryResultsFormat + fn __copy__(slf: PyRef<'_, Self>) -> PyRef { + slf + } + + /// :type memo: typing.Any + /// :rtype: QueryResultsFormat + #[allow(unused_variables)] + fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ PyAny) -> PyRef<'a, Self> { + slf + } +} + +pub fn lookup_query_results_format( + format: Option, + path: Option<&Path>, +) -> PyResult { + if let Some(format) = format { + return Ok(format.inner); + } + let Some(path) = path else { + return Err(PyValueError::new_err( + "The format parameter is required when a file path is not given", + )); + }; + let Some(ext) = path.extension().and_then(OsStr::to_str) else { + return Err(PyValueError::new_err(format!( + "The file name {} has no extension to guess a file format from", + path.display() + ))); + }; + QueryResultsFormat::from_extension(ext) + .ok_or_else(|| PyValueError::new_err(format!("Not supported RDF format extension: {ext}"))) +} + pub fn map_evaluation_error(error: EvaluationError) -> PyErr { match error { EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()), diff --git a/python/src/store.rs b/python/src/store.rs index 4340d03e..f9e0b800 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -1,11 +1,11 @@ #![allow(clippy::needless_option_as_deref)] use crate::io::{ - allow_threads_unsafe, map_parse_error, parse_format, PyReadable, PyReadableInput, PyWritable, + allow_threads_unsafe, lookup_rdf_format, map_parse_error, PyRdfFormat, PyReadable, + PyReadableInput, PyWritable, PyWritableOutput, }; use crate::model::*; use crate::sparql::*; -use oxigraph::io::RdfFormat; use oxigraph::model::{GraphName, GraphNameRef}; use oxigraph::sparql::Update; use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store}; @@ -351,12 +351,12 @@ impl PyStore { /// /// It currently supports the following formats: /// - /// * `N-Triples `_ (``application/n-triples`` or ``nt``) - /// * `N-Quads `_ (``application/n-quads`` or ``nq``) - /// * `Turtle `_ (``text/turtle`` or ``ttl``) - /// * `TriG `_ (``application/trig`` or ``trig``) - /// * `N3 `_ (``text/n3`` or ``n3``) - /// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) + /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) + /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) + /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) + /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) + /// * `N3 `_ (:py:attr:`RdfFormat.N3`) + /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -364,8 +364,8 @@ impl PyStore { /// /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: str or None, optional + /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: RdfFormat or None, optional /// :param path: The file path to read from. Replaces the ``input`` parameter. /// :type path: str or os.PathLike[str] or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. @@ -378,14 +378,14 @@ impl PyStore { /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. /// /// >>> store = Store() - /// >>> store.load(input='

"1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> store.load(input='

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] fn load( &self, input: Option, - format: Option<&str>, + format: Option, path: Option, base_iri: Option<&str>, to_graph: Option<&PyAny>, @@ -397,7 +397,7 @@ impl PyStore { None }; let input = PyReadable::from_args(&path, input, py)?; - let format: RdfFormat = parse_format(format, path.as_deref())?; + let format = lookup_rdf_format(format, path.as_deref())?; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { self.inner @@ -418,12 +418,12 @@ impl PyStore { /// /// It currently supports the following formats: /// - /// * `N-Triples `_ (``application/n-triples`` or ``nt``) - /// * `N-Quads `_ (``application/n-quads`` or ``nq``) - /// * `Turtle `_ (``text/turtle`` or ``ttl``) - /// * `TriG `_ (``application/trig`` or ``trig``) - /// * `N3 `_ (``text/n3`` or ``n3``) - /// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) + /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) + /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) + /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) + /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) + /// * `N3 `_ (:py:attr:`RdfFormat.N3`) + /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -431,7 +431,7 @@ impl PyStore { /// /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :param path: The file path to read from. Replaces the ``input`` parameter. /// :type path: str or os.PathLike[str] or None, optional @@ -445,14 +445,14 @@ impl PyStore { /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. /// /// >>> store = Store() - /// >>> store.bulk_load(input=b'

"1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> store.bulk_load(input=b'

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] fn bulk_load( &self, input: Option, - format: Option<&str>, + format: Option, path: Option, base_iri: Option<&str>, to_graph: Option<&PyAny>, @@ -464,7 +464,7 @@ impl PyStore { None }; let input = PyReadable::from_args(&path, input, py)?; - let format: RdfFormat = parse_format(format, path.as_deref())?; + let format = lookup_rdf_format(format, path.as_deref())?; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { self.inner @@ -483,12 +483,12 @@ impl PyStore { /// /// It currently supports the following formats: /// - /// * `N-Triples `_ (``application/n-triples`` or ``nt``) - /// * `N-Quads `_ (``application/n-quads`` or ``nq``) - /// * `Turtle `_ (``text/turtle`` or ``ttl``) - /// * `TriG `_ (``application/trig`` or ``trig``) - /// * `N3 `_ (``text/n3`` or ``n3``) - /// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``) + /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) + /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) + /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) + /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) + /// * `N3 `_ (:py:attr:`RdfFormat.N3`) + /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) /// /// It supports also some media type and extension aliases. /// For example, ``application/turtle`` could also be used for `Turtle `_ @@ -496,31 +496,31 @@ impl PyStore { /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: str or None, optional + /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: RdfFormat or None, optional /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written. /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional - /// :return: py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. + /// :return: :py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. /// :rtype: bytes or None /// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs. /// :raises OSError: if an error happens during a quad lookup or file writing. /// /// >>> store = Store() /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> store.dump(format="trig") + /// >>> store.dump(format=RdfFormat.TRIG) /// b' "1" .\n' /// /// >>> store = Store() /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> output = io.BytesIO() - /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) + /// >>> store.dump(output, RdfFormat.TURTLE, from_graph=NamedNode("http://example.com/g")) /// >>> output.getvalue() /// b' "1" .\n' - #[pyo3(signature = (output = None, /, format = None, *, from_graph = None))] + #[pyo3(signature = (output = None, format = None, *, from_graph = None))] fn dump<'a>( &self, - output: Option<&PyAny>, - format: Option<&str>, + output: Option, + format: Option, from_graph: Option<&PyAny>, py: Python<'a>, ) -> PyResult> { @@ -529,9 +529,10 @@ impl PyStore { } else { None }; - PyWritable::do_write::( - |output, format| { + PyWritable::do_write( + |output, file_path| { py.allow_threads(|| { + let format = lookup_rdf_format(format, file_path.as_deref())?; if let Some(from_graph_name) = &from_graph_name { self.inner.dump_graph(output, format, from_graph_name) } else { @@ -541,7 +542,6 @@ impl PyStore { }) }, output, - format, py, ) } diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 9c8d4047..fe137eff 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -8,7 +8,9 @@ from pyoxigraph import ( NamedNode, Quad, QueryBoolean, + QueryResultsFormat, QuerySolutions, + RdfFormat, parse, parse_query_results, serialize, @@ -39,14 +41,14 @@ class TestParse(unittest.TestCase): def test_parse_not_existing_file(self) -> None: with self.assertRaises(IOError) as _: - parse(path="/tmp/not-existing-oxigraph-file.ttl", format="text/turtle") + parse(path="/tmp/not-existing-oxigraph-file.ttl", format=RdfFormat.TURTLE) def test_parse_str(self) -> None: self.assertEqual( list( parse( '

"éù" .', - "text/turtle", + RdfFormat.TURTLE, base_iri="http://example.com/", ) ), @@ -58,7 +60,7 @@ class TestParse(unittest.TestCase): list( parse( '

"éù" .'.encode(), - "text/turtle", + RdfFormat.TURTLE, base_iri="http://example.com/", ) ), @@ -70,7 +72,7 @@ class TestParse(unittest.TestCase): list( parse( StringIO('

"éù" .'), - "text/turtle", + RdfFormat.TURTLE, base_iri="http://example.com/", ) ), @@ -82,7 +84,7 @@ class TestParse(unittest.TestCase): list( parse( StringIO('

"éù" .\n' * 1024), - "text/turtle", + RdfFormat.TURTLE, base_iri="http://example.com/", ) ), @@ -94,7 +96,7 @@ class TestParse(unittest.TestCase): list( parse( BytesIO('

"éù" .'.encode()), - "text/turtle", + RdfFormat.TURTLE, base_iri="http://example.com/", ) ), @@ -103,14 +105,14 @@ class TestParse(unittest.TestCase): def test_parse_io_error(self) -> None: with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - list(parse(fp, "nt")) + list(parse(fp, RdfFormat.N_TRIPLES)) def test_parse_quad(self) -> None: self.assertEqual( list( parse( ' {

"1" }', - "application/trig", + RdfFormat.TRIG, base_iri="http://example.com/", ) ), @@ -123,7 +125,7 @@ class TestParse(unittest.TestCase): fp.write(b' "p" "1"') fp.flush() with self.assertRaises(SyntaxError) as ctx: - list(parse(path=fp.name, format="text/turtle")) + list(parse(path=fp.name, format=RdfFormat.TURTLE)) self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.lineno, 2) self.assertEqual(ctx.exception.offset, 7) @@ -136,7 +138,7 @@ class TestParse(unittest.TestCase): list( parse( ' {

"1" }', - "application/trig", + RdfFormat.TRIG, base_iri="http://example.com/", without_named_graphs=True, ) @@ -147,14 +149,14 @@ class TestParse(unittest.TestCase): list( parse( '_:s "o" .', - "application/n-triples", + RdfFormat.N_TRIPLES, rename_blank_nodes=True, ) ), list( parse( '_:s "o" .', - "application/n-triples", + RdfFormat.N_TRIPLES, rename_blank_nodes=True, ) ), @@ -164,13 +166,13 @@ class TestParse(unittest.TestCase): class TestSerialize(unittest.TestCase): def test_serialize_to_bytes(self) -> None: self.assertEqual( - (serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle") or b"").decode(), + (serialize([EXAMPLE_TRIPLE.triple], None, RdfFormat.TURTLE) or b"").decode(), ' "éù" .\n', ) def test_serialize_to_bytes_io(self) -> None: output = BytesIO() - serialize([EXAMPLE_TRIPLE.triple], output, "text/turtle") + serialize([EXAMPLE_TRIPLE.triple], output, RdfFormat.TURTLE) self.assertEqual( output.getvalue().decode(), ' "éù" .\n', @@ -186,11 +188,11 @@ class TestSerialize(unittest.TestCase): def test_serialize_io_error(self) -> None: with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("rb") as fp: - serialize([EXAMPLE_TRIPLE], fp, "text/turtle") + serialize([EXAMPLE_TRIPLE], fp, RdfFormat.TURTLE) def test_serialize_quad(self) -> None: output = BytesIO() - serialize([EXAMPLE_QUAD], output, "application/trig") + serialize([EXAMPLE_QUAD], output, RdfFormat.TRIG) self.assertEqual( output.getvalue(), b' {\n\t "1" .\n}\n', @@ -210,38 +212,38 @@ class TestParseQuerySolutions(unittest.TestCase): def test_parse_not_existing_file(self) -> None: with self.assertRaises(IOError) as _: - parse_query_results(path="/tmp/not-existing-oxigraph-file.ttl", format="application/json") + parse_query_results(path="/tmp/not-existing-oxigraph-file.ttl", format=QueryResultsFormat.JSON) def test_parse_str(self) -> None: - result = parse_query_results("true", "tsv") + result = parse_query_results("true", QueryResultsFormat.TSV) self.assertIsInstance(result, QueryBoolean) self.assertTrue(result) def test_parse_bytes(self) -> None: - result = parse_query_results(b"false", "tsv") + result = parse_query_results(b"false", QueryResultsFormat.TSV) self.assertIsInstance(result, QueryBoolean) self.assertFalse(result) def test_parse_str_io(self) -> None: - result = parse_query_results("true", "tsv") + result = parse_query_results("true", QueryResultsFormat.TSV) self.assertIsInstance(result, QueryBoolean) self.assertTrue(result) def test_parse_bytes_io(self) -> None: - result = parse_query_results(BytesIO(b"false"), "tsv") + result = parse_query_results(BytesIO(b"false"), QueryResultsFormat.TSV) self.assertIsInstance(result, QueryBoolean) self.assertFalse(result) def test_parse_io_error(self) -> None: with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - parse_query_results(fp, "srx") + parse_query_results(fp, QueryResultsFormat.XML) def test_parse_syntax_error_json(self) -> None: with NamedTemporaryFile() as fp: fp.write(b"{]") fp.flush() with self.assertRaises(SyntaxError) as ctx: - list(parse_query_results(path=fp.name, format="srj")) # type: ignore[arg-type] + list(parse_query_results(path=fp.name, format=QueryResultsFormat.JSON)) # type: ignore[arg-type] self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.lineno, 1) self.assertEqual(ctx.exception.offset, 2) @@ -255,7 +257,7 @@ class TestParseQuerySolutions(unittest.TestCase): fp.write(b"1\t\n") fp.flush() with self.assertRaises(SyntaxError) as ctx: - list(parse_query_results(path=fp.name, format="tsv")) # type: ignore[arg-type] + list(parse_query_results(path=fp.name, format=QueryResultsFormat.TSV)) # type: ignore[arg-type] self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.lineno, 2) self.assertEqual(ctx.exception.offset, 3) diff --git a/python/tests/test_store.py b/python/tests/test_store.py index 001755fd..4f47b5f0 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -9,9 +9,12 @@ from pyoxigraph import ( DefaultGraph, NamedNode, Quad, + QueryBoolean, + QueryResultsFormat, QuerySolution, QuerySolutions, QueryTriples, + RdfFormat, Store, Triple, Variable, @@ -190,9 +193,10 @@ class TestStore(unittest.TestCase): def test_select_query_dump(self) -> None: store = Store() store.add(Quad(foo, bar, baz)) - results = store.query("SELECT ?s WHERE { ?s ?p ?o }") + results: QuerySolutions = store.query("SELECT ?s WHERE { ?s ?p ?o }") # type: ignore[assignment] + self.assertIsInstance(results, QuerySolutions) output = BytesIO() - results.serialize(output, "csv") + results.serialize(output, QueryResultsFormat.CSV) self.assertEqual( output.getvalue().decode(), "s\r\nhttp://foo\r\n", @@ -201,9 +205,10 @@ class TestStore(unittest.TestCase): def test_ask_query_dump(self) -> None: store = Store() store.add(Quad(foo, bar, baz)) - results = store.query("ASK { ?s ?p ?o }") + results: QueryBoolean = store.query("ASK { ?s ?p ?o }") # type: ignore[assignment] + self.assertIsInstance(results, QueryBoolean) output = BytesIO() - results.serialize(output, "csv") + results.serialize(output, QueryResultsFormat.CSV) self.assertEqual( output.getvalue().decode(), "true", @@ -212,9 +217,10 @@ class TestStore(unittest.TestCase): def test_construct_query_dump(self) -> None: store = Store() store.add(Quad(foo, bar, baz)) - results = store.query("CONSTRUCT WHERE { ?s ?p ?o }") + results: QueryTriples = store.query("CONSTRUCT WHERE { ?s ?p ?o }") # type: ignore[assignment] + self.assertIsInstance(results, QueryTriples) output = BytesIO() - results.serialize(output, "nt") + results.serialize(output, RdfFormat.N_TRIPLES) self.assertEqual( output.getvalue().decode(), " .\n", @@ -254,7 +260,7 @@ class TestStore(unittest.TestCase): store = Store() store.load( b" .", - "application/n-triples", + RdfFormat.N_TRIPLES, ) self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) @@ -262,7 +268,7 @@ class TestStore(unittest.TestCase): store = Store() store.load( " .", - "application/n-triples", + RdfFormat.N_TRIPLES, to_graph=graph, ) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) @@ -271,7 +277,7 @@ class TestStore(unittest.TestCase): store = Store() store.load( BytesIO(b" <> ."), - "text/turtle", + RdfFormat.TURTLE, base_iri="http://baz", ) self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) @@ -280,7 +286,7 @@ class TestStore(unittest.TestCase): store = Store() store.load( StringIO(" ."), - "nq", + RdfFormat.N_QUADS, ) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) @@ -288,7 +294,7 @@ class TestStore(unittest.TestCase): store = Store() store.load( " { <> . }", - "application/trig", + RdfFormat.TRIG, base_iri="http://baz", ) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) @@ -303,13 +309,13 @@ class TestStore(unittest.TestCase): def test_load_with_io_error(self) -> None: with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - Store().load(fp, "application/n-triples") + Store().load(fp, RdfFormat.N_TRIPLES) def test_dump_ntriples(self) -> None: store = Store() store.add(Quad(foo, bar, baz, graph)) output = BytesIO() - store.dump(output, "application/n-triples", from_graph=graph) + store.dump(output, RdfFormat.N_TRIPLES, from_graph=graph) self.assertEqual( output.getvalue(), b" .\n", @@ -319,7 +325,7 @@ class TestStore(unittest.TestCase): store = Store() store.add(Quad(foo, bar, baz, graph)) self.assertEqual( - store.dump(format="nq"), + store.dump(format=RdfFormat.N_QUADS), b" .\n", ) @@ -328,7 +334,7 @@ class TestStore(unittest.TestCase): store.add(Quad(foo, bar, baz, graph)) store.add(Quad(foo, bar, baz)) output = BytesIO() - store.dump(output, "application/trig") + store.dump(output, RdfFormat.TRIG) self.assertEqual( output.getvalue(), b" .\n" @@ -340,7 +346,7 @@ class TestStore(unittest.TestCase): store = Store() store.add(Quad(foo, bar, baz, graph)) file_name = Path(fp.name) - store.dump(file_name, "nq") + store.dump(file_name, RdfFormat.N_QUADS) self.assertEqual( file_name.read_text(), " .\n", @@ -350,7 +356,7 @@ class TestStore(unittest.TestCase): store = Store() store.add(Quad(foo, bar, bar)) with self.assertRaises(OSError) as _, TemporaryFile("rb") as fp: - store.dump(fp, "application/trig") + store.dump(fp, RdfFormat.TRIG) def test_write_in_read(self) -> None: store = Store()