From 261f9c64a59a7d547b2d384e8d57d944065ed290 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 13 Oct 2023 17:51:02 +0200 Subject: [PATCH] Python: I/O adds a `path` parameter to read/write from/to a file --- oxrocksdb-sys/rocksdb | 2 +- python/src/io.rs | 69 +++++++++++++++++++++++++++----------- python/src/store.rs | 56 +++++++++++++++---------------- python/tests/test_io.py | 38 +++++++++++++++++---- python/tests/test_store.py | 12 +++---- 5 files changed, 114 insertions(+), 63 deletions(-) diff --git a/oxrocksdb-sys/rocksdb b/oxrocksdb-sys/rocksdb index 5f2d6f0c..1ce22dd6 160000 --- a/oxrocksdb-sys/rocksdb +++ b/oxrocksdb-sys/rocksdb @@ -1 +1 @@ -Subproject commit 5f2d6f0cba9858130be48ae129dd9c9dcafe0f97 +Subproject commit 1ce22dd6376b124d17eff7d96e0809d2f4b4ae70 diff --git a/python/src/io.rs b/python/src/io.rs index 19c0b631..44cc48e9 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -30,10 +30,12 @@ use std::sync::OnceLock; /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// -/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. -/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] +/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. +/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional +/// :param path: The file path to read from. Replaces the ``input`` parameter. +/// :type path: str or os.PathLike[str] or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param without_named_graphs: Sets that the parser must fail when parsing a named graph. @@ -46,26 +48,21 @@ use std::sync::OnceLock; /// :raises SyntaxError: if the provided data is invalid. /// :raises OSError: if a system error happens while reading the file. /// -/// >>> input = io.BytesIO(b'

"1" .') -/// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) +/// >>> list(parse(input=b'

"1" .', format="text/turtle", base_iri="http://example.com/")) /// [ predicate= object=> graph_name=>] #[pyfunction] -#[pyo3(signature = (input, /, format = None, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] +#[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] pub fn parse( - input: &PyAny, + input: Option, format: Option<&str>, + path: Option, base_iri: Option<&str>, without_named_graphs: bool, rename_blank_nodes: bool, py: Python<'_>, ) -> PyResult { - let file_path = input.extract::().ok(); - let format = parse_format(format, file_path.as_deref())?; - let input = if let Some(file_path) = &file_path { - PyReadable::from_file(file_path, py)? - } else { - PyReadable::from_data(input) - }; + let input = PyReadable::from_args(&path, input, py)?; + let format = parse_format(format, path.as_deref())?; let mut parser = RdfParser::from_format(format); if let Some(base_iri) = base_iri { parser = parser @@ -80,7 +77,7 @@ pub fn parse( } Ok(PyQuadReader { inner: parser.parse_read(input), - file_path, + file_path: path, } .into_py(py)) } @@ -120,7 +117,7 @@ pub fn parse( /// >>> output.getvalue() /// b' "1" .\n' #[pyfunction] -#[pyo3(signature = (input, output = None, /, format = None))] +#[pyo3(signature = (input, output = None, format = None))] pub fn serialize<'a>( input: &PyAny, output: Option<&PyAny>, @@ -167,13 +164,12 @@ impl PyQuadReader { fn __next__(&mut self, py: Python<'_>) -> PyResult> { py.allow_threads(|| { - self.inner + Ok(self + .inner .next() - .map(|q| { - Ok(q.map_err(|e| map_parse_error(e, self.file_path.clone()))? - .into()) - }) .transpose() + .map_err(|e| map_parse_error(e, self.file_path.clone()))? + .map(PyQuad::from)) }) } } @@ -185,6 +181,22 @@ pub enum PyReadable { } impl PyReadable { + pub fn from_args( + path: &Option, + input: Option, + py: Python<'_>, + ) -> PyResult { + match (path, input) { + (Some(_), Some(_)) => Err(PyValueError::new_err( + "input and file_path can't be both set at the same time", + )), + (Some(path), None) => Ok(PyReadable::from_file(path, py)?), + (None, Some(input)) => Ok(input.into()), + (None, None) => Err(PyValueError::new_err( + "Either input or file_path must be set", + )), + } + } pub fn from_file(file: &Path, py: Python<'_>) -> io::Result { Ok(Self::File(py.allow_threads(|| File::open(file))?)) } @@ -210,6 +222,23 @@ impl Read for PyReadable { } } +#[derive(FromPyObject)] +pub enum PyReadableInput { + String(String), + Bytes(Vec), + Io(PyObject), +} + +impl From for PyReadable { + fn from(input: PyReadableInput) -> Self { + match input { + PyReadableInput::String(string) => Self::Bytes(Cursor::new(string.into_bytes())), + PyReadableInput::Bytes(bytes) => Self::Bytes(Cursor::new(bytes)), + PyReadableInput::Io(io) => Self::Io(PyIo(io)), + } + } +} + pub enum PyWritable { Bytes(Vec), Io(PyIo), diff --git a/python/src/store.rs b/python/src/store.rs index 54fad079..4340d03e 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -1,6 +1,8 @@ #![allow(clippy::needless_option_as_deref)] -use crate::io::{allow_threads_unsafe, map_parse_error, parse_format, PyReadable, PyWritable}; +use crate::io::{ + allow_threads_unsafe, map_parse_error, parse_format, PyReadable, PyReadableInput, PyWritable, +}; use crate::model::*; use crate::sparql::*; use oxigraph::io::RdfFormat; @@ -360,10 +362,12 @@ impl PyStore { /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// - /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional + /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional + /// :param path: The file path to read from. Replaces the ``input`` parameter. + /// :type path: str or os.PathLike[str] or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. @@ -374,14 +378,15 @@ impl PyStore { /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. /// /// >>> store = Store() - /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> store.load(input='

"1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))] + #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] fn load( &self, - input: &PyAny, + input: Option, format: Option<&str>, + path: Option, base_iri: Option<&str>, to_graph: Option<&PyAny>, py: Python<'_>, @@ -391,13 +396,8 @@ impl PyStore { } else { None }; - let file_path = input.extract::().ok(); - let format = parse_format::(format, file_path.as_deref())?; - let input = if let Some(file_path) = &file_path { - PyReadable::from_file(file_path, py)? - } else { - PyReadable::from_data(input) - }; + let input = PyReadable::from_args(&path, input, py)?; + let format: RdfFormat = parse_format(format, path.as_deref())?; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { self.inner @@ -405,7 +405,7 @@ impl PyStore { } else { self.inner.load_dataset(input, format, base_iri) } - .map_err(|e| map_loader_error(e, file_path)) + .map_err(|e| map_loader_error(e, path)) }) } @@ -429,10 +429,12 @@ impl PyStore { /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// - /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional + /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional + /// :param path: The file path to read from. Replaces the ``input`` parameter. + /// :type path: str or os.PathLike[str] or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. @@ -443,14 +445,15 @@ impl PyStore { /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. /// /// >>> store = Store() - /// >>> store.bulk_load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> store.bulk_load(input=b'

"1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))] + #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] fn bulk_load( &self, - input: &PyAny, + input: Option, format: Option<&str>, + path: Option, base_iri: Option<&str>, to_graph: Option<&PyAny>, py: Python<'_>, @@ -460,13 +463,8 @@ impl PyStore { } else { None }; - let file_path = input.extract::().ok(); - let format = parse_format::(format, file_path.as_deref())?; - let input = if let Some(file_path) = &file_path { - PyReadable::from_file(file_path, py)? - } else { - PyReadable::from_data(input) - }; + let input = PyReadable::from_args(&path, input, py)?; + let format: RdfFormat = parse_format(format, path.as_deref())?; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { self.inner @@ -477,7 +475,7 @@ impl PyStore { .bulk_loader() .load_dataset(input, format, base_iri) } - .map_err(|e| map_loader_error(e, file_path)) + .map_err(|e| map_loader_error(e, path)) }) } diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 65223049..596c6fd3 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -33,13 +33,37 @@ class TestParse(unittest.TestCase): fp.write('

"éù" .'.encode()) fp.flush() self.assertEqual( - list(parse(fp.name, base_iri="http://example.com/")), + list(parse(path=fp.name, base_iri="http://example.com/")), [EXAMPLE_TRIPLE], ) def test_parse_not_existing_file(self) -> None: with self.assertRaises(IOError) as _: - parse("/tmp/not-existing-oxigraph-file.ttl", "text/turtle") + parse(path="/tmp/not-existing-oxigraph-file.ttl", format="text/turtle") + + def test_parse_str(self) -> None: + self.assertEqual( + list( + parse( + '

"éù" .', + "text/turtle", + base_iri="http://example.com/", + ) + ), + [EXAMPLE_TRIPLE], + ) + + def test_parse_bytes(self) -> None: + self.assertEqual( + list( + parse( + '

"éù" .'.encode(), + "text/turtle", + base_iri="http://example.com/", + ) + ), + [EXAMPLE_TRIPLE], + ) def test_parse_str_io(self) -> None: self.assertEqual( @@ -85,7 +109,7 @@ class TestParse(unittest.TestCase): self.assertEqual( list( parse( - StringIO(' {

"1" }'), + ' {

"1" }', "application/trig", base_iri="http://example.com/", ) @@ -99,7 +123,7 @@ class TestParse(unittest.TestCase): fp.write(b' "p" "1"') fp.flush() with self.assertRaises(SyntaxError) as ctx: - list(parse(fp.name, "text/turtle")) + list(parse(path=fp.name, format="text/turtle")) self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.lineno, 2) self.assertEqual(ctx.exception.offset, 7) @@ -111,7 +135,7 @@ class TestParse(unittest.TestCase): with self.assertRaises(SyntaxError) as _: list( parse( - StringIO(' {

"1" }'), + ' {

"1" }', "application/trig", base_iri="http://example.com/", without_named_graphs=True, @@ -122,14 +146,14 @@ class TestParse(unittest.TestCase): self.assertNotEqual( list( parse( - StringIO('_:s "o" .'), + '_:s "o" .', "application/n-triples", rename_blank_nodes=True, ) ), list( parse( - StringIO('_:s "o" .'), + '_:s "o" .', "application/n-triples", rename_blank_nodes=True, ) diff --git a/python/tests/test_store.py b/python/tests/test_store.py index b9fc1be8..001755fd 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -1,5 +1,5 @@ import unittest -from io import BytesIO, UnsupportedOperation +from io import BytesIO, StringIO, UnsupportedOperation from pathlib import Path from tempfile import NamedTemporaryFile, TemporaryDirectory, TemporaryFile from typing import Any @@ -253,7 +253,7 @@ class TestStore(unittest.TestCase): def test_load_ntriples_to_default_graph(self) -> None: store = Store() store.load( - BytesIO(b" ."), + b" .", "application/n-triples", ) self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) @@ -261,7 +261,7 @@ class TestStore(unittest.TestCase): def test_load_ntriples_to_named_graph(self) -> None: store = Store() store.load( - BytesIO(b" ."), + " .", "application/n-triples", to_graph=graph, ) @@ -279,7 +279,7 @@ class TestStore(unittest.TestCase): def test_load_nquads(self) -> None: store = Store() store.load( - BytesIO(b" ."), + StringIO(" ."), "nq", ) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) @@ -287,7 +287,7 @@ class TestStore(unittest.TestCase): def test_load_trig_with_base_iri(self) -> None: store = Store() store.load( - BytesIO(b" { <> . }"), + " { <> . }", "application/trig", base_iri="http://baz", ) @@ -298,7 +298,7 @@ class TestStore(unittest.TestCase): fp.write(b" .") fp.flush() store = Store() - store.load(fp.name) + store.load(path=fp.name) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) def test_load_with_io_error(self) -> None: