diff --git a/python/src/io.rs b/python/src/io.rs index 9bd957c6..97ca9b3d 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -9,6 +9,7 @@ use pyo3::types::PyBytes; use pyo3::{intern, wrap_pyfunction}; use std::cmp::max; use std::error::Error; +use std::ffi::OsStr; use std::fs::File; use std::io::{self, BufWriter, Cursor, Read, Write}; use std::path::{Path, PathBuf}; @@ -33,10 +34,10 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> { /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// -/// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. +/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: io(bytes) or io(str) or str or pathlib.Path -/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. -/// :type format: str +/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. +/// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param without_named_graphs: Sets that the parser must fail if parsing a named graph. @@ -52,21 +53,21 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> { /// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) /// [ predicate= object=> graph_name=>] #[pyfunction] -#[pyo3(signature = (input, format, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] +#[pyo3(signature = (input, /, format = None, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] pub fn parse( - input: PyObject, - format: &str, + input: &PyAny, + format: Option<&str>, base_iri: Option<&str>, without_named_graphs: bool, rename_blank_nodes: bool, py: Python<'_>, ) -> PyResult { - let format = rdf_format(format)?; - let file_path = input.extract::(py).ok(); + let file_path = input.extract::().ok(); + let format = rdf_format(format, file_path.as_deref())?; let input = if let Some(file_path) = &file_path { PyReadable::from_file(file_path, py).map_err(map_io_err)? } else { - PyReadable::from_data(input, py) + PyReadable::from_data(input) }; let mut parser = RdfParser::from_format(format); if let Some(base_iri) = base_iri { @@ -106,8 +107,8 @@ pub fn parse( /// :type input: iterable(Triple) or iterable(Quad) /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :type output: io(bytes) or str or pathlib.Path -/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. -/// :type format: str +/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. +/// :type format: str or None, optional /// :rtype: None /// :raises ValueError: if the format is not supported. /// :raises TypeError: if a triple is given during a quad format serialization or reverse. @@ -117,10 +118,16 @@ pub fn parse( /// >>> output.getvalue() /// b' "1" .\n' #[pyfunction] -pub fn serialize(input: &PyAny, output: PyObject, format: &str, py: Python<'_>) -> PyResult<()> { - let format = rdf_format(format)?; - let output = if let Ok(path) = output.extract::(py) { - PyWritable::from_file(&path, py).map_err(map_io_err)? +pub fn serialize( + input: &PyAny, + output: &PyAny, + format: Option<&str>, + py: Python<'_>, +) -> PyResult<()> { + let file_path = output.extract::().ok(); + let format = rdf_format(format, file_path.as_deref())?; + let output = if let Some(file_path) = &file_path { + PyWritable::from_file(file_path, py).map_err(map_io_err)? } else { PyWritable::from_data(output) }; @@ -186,13 +193,13 @@ impl PyReadable { Ok(Self::File(py.allow_threads(|| File::open(file))?)) } - pub fn from_data(data: PyObject, py: Python<'_>) -> Self { - if let Ok(bytes) = data.extract::>(py) { + pub fn from_data(data: &PyAny) -> Self { + if let Ok(bytes) = data.extract::>() { Self::Bytes(Cursor::new(bytes)) - } else if let Ok(string) = data.extract::(py) { + } else if let Ok(string) = data.extract::() { Self::Bytes(Cursor::new(string.into_bytes())) } else { - Self::Io(PyIo(data)) + Self::Io(PyIo(data.into())) } } } @@ -217,8 +224,8 @@ impl PyWritable { Ok(Self::File(py.allow_threads(|| File::create(file))?)) } - pub fn from_data(data: PyObject) -> Self { - Self::Io(PyIo(data)) + pub fn from_data(data: &PyAny) -> Self { + Self::Io(PyIo(data.into())) } pub fn close(mut self) -> io::Result<()> { @@ -293,7 +300,23 @@ impl Write for PyIo { } } -pub fn rdf_format(format: &str) -> PyResult { +pub fn rdf_format(format: Option<&str>, path: Option<&Path>) -> PyResult { + let format = if let Some(format) = format { + format + } else if let Some(path) = path { + if let Some(ext) = path.extension().and_then(OsStr::to_str) { + ext + } else { + return Err(PyValueError::new_err(format!( + "The file name {} has no extension to guess a file format from", + path.display() + ))); + } + } else { + return Err(PyValueError::new_err( + "The format parameter is required when a file path is not given", + )); + }; if format.contains('/') { RdfFormat::from_media_type(format).ok_or_else(|| { PyValueError::new_err(format!("Not supported RDF format media type: {format}")) diff --git a/python/src/store.rs b/python/src/store.rs index 70dda4cd..42dbc533 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -360,10 +360,10 @@ impl PyStore { /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// - /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: io(bytes) or io(str) or str or pathlib.Path - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. - /// :type format: str + /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. @@ -377,26 +377,26 @@ impl PyStore { /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, format, *, base_iri = None, to_graph = None))] + #[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))] fn load( &self, - input: PyObject, - format: &str, + input: &PyAny, + format: Option<&str>, base_iri: Option<&str>, to_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { - let format = rdf_format(format)?; let to_graph_name = if let Some(graph_name) = to_graph { Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) } else { None }; - let file_path = input.extract::(py).ok(); + let file_path = input.extract::().ok(); + let format = rdf_format(format, file_path.as_deref())?; let input = if let Some(file_path) = &file_path { PyReadable::from_file(file_path, py).map_err(map_io_err)? } else { - PyReadable::from_data(input, py) + PyReadable::from_data(input) }; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { @@ -429,10 +429,10 @@ impl PyStore { /// For example, ``application/turtle`` could also be used for `Turtle `_ /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// - /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :type input: io(bytes) or io(str) or str or pathlib.Path - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. - /// :type format: str + /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :type base_iri: str or None, optional /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. @@ -446,26 +446,26 @@ impl PyStore { /// >>> store.bulk_load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, format, *, base_iri = None, to_graph = None))] + #[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))] fn bulk_load( &self, - input: PyObject, - format: &str, + input: &PyAny, + format: Option<&str>, base_iri: Option<&str>, to_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { - let format = rdf_format(format)?; let to_graph_name = if let Some(graph_name) = to_graph { Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) } else { None }; - let file_path = input.extract::(py).ok(); + let file_path = input.extract::().ok(); + let format = rdf_format(format, file_path.as_deref())?; let input = if let Some(file_path) = &file_path { PyReadable::from_file(file_path, py).map_err(map_io_err)? } else { - PyReadable::from_data(input, py) + PyReadable::from_data(input) }; py.allow_threads(|| { if let Some(to_graph_name) = to_graph_name { @@ -498,8 +498,8 @@ impl PyStore { /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :type output: io(bytes) or str or pathlib.Path - /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. - /// :type format: str + /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. + /// :type format: str or None, optional /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written. /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional /// :rtype: None @@ -512,22 +512,23 @@ impl PyStore { /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) /// >>> output.getvalue() /// b' "1" .\n' - #[pyo3(signature = (output, format, *, from_graph = None))] + #[pyo3(signature = (output, /, format = None, *, from_graph = None))] fn dump( &self, - output: PyObject, - format: &str, + output: &PyAny, + format: Option<&str>, from_graph: Option<&PyAny>, py: Python<'_>, ) -> PyResult<()> { - let format = rdf_format(format)?; let from_graph_name = if let Some(graph_name) = from_graph { Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) } else { None }; - let output = if let Ok(path) = output.extract::(py) { - PyWritable::from_file(&path, py).map_err(map_io_err)? + let file_path = output.extract::().ok(); + let format = rdf_format(format, file_path.as_deref())?; + let output = if let Some(file_path) = &file_path { + PyWritable::from_file(file_path, py).map_err(map_io_err)? } else { PyWritable::from_data(output) }; diff --git a/python/tests/test_io.py b/python/tests/test_io.py index d70179e7..006fc436 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -20,11 +20,11 @@ EXAMPLE_QUAD = Quad( class TestParse(unittest.TestCase): def test_parse_file(self) -> None: - with NamedTemporaryFile() as fp: + with NamedTemporaryFile(suffix=".ttl") as fp: fp.write('

"éù" .'.encode()) fp.flush() self.assertEqual( - list(parse(fp.name, "text/turtle", base_iri="http://example.com/")), + list(parse(fp.name, base_iri="http://example.com/")), [EXAMPLE_TRIPLE], ) @@ -138,8 +138,8 @@ class TestSerialize(unittest.TestCase): ) def test_serialize_to_file(self) -> None: - with NamedTemporaryFile() as fp: - serialize([EXAMPLE_TRIPLE], fp.name, "text/turtle") + with NamedTemporaryFile(suffix=".ttl") as fp: + serialize([EXAMPLE_TRIPLE], fp.name) self.assertEqual( fp.read().decode(), ' "éù" .\n', diff --git a/python/tests/test_store.py b/python/tests/test_store.py index a3ed8fe0..56f30b4b 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -265,13 +265,12 @@ class TestStore(unittest.TestCase): self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) def test_load_file(self) -> None: - with NamedTemporaryFile(delete=False) as fp: - file_name = Path(fp.name) + with NamedTemporaryFile(suffix=".nq") as fp: fp.write(b" .") - store = Store() - store.load(file_name, "nq") - file_name.unlink() - self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) + fp.flush() + store = Store() + store.load(fp.name) + self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) def test_load_with_io_error(self) -> None: with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: @@ -311,14 +310,14 @@ class TestStore(unittest.TestCase): def test_dump_file(self) -> None: with NamedTemporaryFile(delete=False) as fp: + store = Store() + store.add(Quad(foo, bar, baz, graph)) file_name = Path(fp.name) - store = Store() - store.add(Quad(foo, bar, baz, graph)) - store.dump(file_name, "nq") - self.assertEqual( - file_name.read_text(), - " .\n", - ) + store.dump(file_name, "nq") + self.assertEqual( + file_name.read_text(), + " .\n", + ) def test_dump_with_io_error(self) -> None: store = Store()