Python: makes serialization method output bytes if no output is specified

pull/631/head
Tpt 1 year ago committed by Thomas Tanon
parent 1b511ed018
commit a8f98a0056
  1. 56
      python/src/io.rs
  2. 41
      python/src/store.rs
  3. 6
      python/tests/test_io.py
  4. 4
      python/tests/test_store.py

@ -105,31 +105,39 @@ pub fn parse(
/// ///
/// :param input: the RDF triples and quads to serialize. /// :param input: the RDF triples and quads to serialize.
/// :type input: iterable(Triple) or iterable(Quad) /// :type input: iterable(Triple) or iterable(Quad)
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path /// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional /// :type format: str or None, optional
/// :rtype: None /// :rtype: bytes or None
/// :raises ValueError: if the format is not supported. /// :raises ValueError: if the format is not supported.
/// :raises TypeError: if a triple is given during a quad format serialization or reverse. /// :raises TypeError: if a triple is given during a quad format serialization or reverse.
/// ///
/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], format="ttl")
/// b'<http://example.com> <http://example.com/p> "1" .\n'
///
/// >>> output = io.BytesIO() /// >>> output = io.BytesIO()
/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, "text/turtle") /// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, "text/turtle")
/// >>> output.getvalue() /// >>> output.getvalue()
/// b'<http://example.com> <http://example.com/p> "1" .\n' /// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyfunction] #[pyfunction]
pub fn serialize( #[pyo3(signature = (input, output = None, /, format = None))]
pub fn serialize<'a>(
input: &PyAny, input: &PyAny,
output: &PyAny, output: Option<&PyAny>,
format: Option<&str>, format: Option<&str>,
py: Python<'_>, py: Python<'a>,
) -> PyResult<()> { ) -> PyResult<Option<&'a PyBytes>> {
let file_path = output.extract::<PathBuf>().ok(); let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = rdf_format(format, file_path.as_deref())?; let format = rdf_format(format, file_path.as_deref())?;
let output = if let Some(file_path) = &file_path { let output = if let Some(output) = output {
PyWritable::from_file(file_path, py).map_err(map_io_err)? if let Some(file_path) = &file_path {
PyWritable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyWritable::from_data(output)
}
} else { } else {
PyWritable::from_data(output) PyWritable::Bytes(Vec::new())
}; };
let mut writer = RdfSerializer::from_format(format).serialize_to_write(BufWriter::new(output)); let mut writer = RdfSerializer::from_format(format).serialize_to_write(BufWriter::new(output));
for i in input.iter()? { for i in input.iter()? {
@ -153,8 +161,7 @@ pub fn serialize(
.map_err(map_io_err)? .map_err(map_io_err)?
.into_inner() .into_inner()
.map_err(|e| map_io_err(e.into_error()))? .map_err(|e| map_io_err(e.into_error()))?
.close() .close(py)
.map_err(map_io_err)
} }
#[pyclass(name = "QuadReader", module = "pyoxigraph")] #[pyclass(name = "QuadReader", module = "pyoxigraph")]
@ -215,6 +222,7 @@ impl Read for PyReadable {
} }
pub enum PyWritable { pub enum PyWritable {
Bytes(Vec<u8>),
Io(PyIo), Io(PyIo),
File(File), File(File),
} }
@ -228,18 +236,29 @@ impl PyWritable {
Self::Io(PyIo(data.into())) Self::Io(PyIo(data.into()))
} }
pub fn close(mut self) -> io::Result<()> { pub fn close(self, py: Python<'_>) -> PyResult<Option<&PyBytes>> {
self.flush()?; match self {
if let Self::File(file) = self { Self::Bytes(bytes) => Ok(Some(PyBytes::new(py, &bytes))),
file.sync_all()?; Self::File(mut file) => {
py.allow_threads(|| {
file.flush()?;
file.sync_all()
})
.map_err(map_io_err)?;
Ok(None)
}
Self::Io(mut io) => {
py.allow_threads(|| io.flush()).map_err(map_io_err)?;
Ok(None)
}
} }
Ok(())
} }
} }
impl Write for PyWritable { impl Write for PyWritable {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self { match self {
Self::Bytes(bytes) => bytes.write(buf),
Self::Io(io) => io.write(buf), Self::Io(io) => io.write(buf),
Self::File(file) => file.write(buf), Self::File(file) => file.write(buf),
} }
@ -247,6 +266,7 @@ impl Write for PyWritable {
fn flush(&mut self) -> io::Result<()> { fn flush(&mut self) -> io::Result<()> {
match self { match self {
Self::Bytes(_) => Ok(()),
Self::Io(io) => io.flush(), Self::Io(io) => io.flush(),
Self::File(file) => file.flush(), Self::File(file) => file.flush(),
} }

@ -10,6 +10,7 @@ use oxigraph::sparql::Update;
use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store}; use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store};
use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes;
use std::io::BufWriter; use std::io::BufWriter;
use std::path::PathBuf; use std::path::PathBuf;
@ -496,41 +497,50 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_ /// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path /// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional /// :type format: str or None, optional
/// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written. /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written.
/// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional
/// :rtype: None /// :rtype: bytes or None
/// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs. /// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs.
/// :raises OSError: if an error happens during a quad lookup /// :raises OSError: if an error happens during a quad lookup
/// ///
/// >>> store = Store() /// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> store.dump(format="trig")
/// b'<http://example.com> <http://example.com/p> "1" .\n'
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')))
/// >>> output = io.BytesIO() /// >>> output = io.BytesIO()
/// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g"))
/// >>> output.getvalue() /// >>> output.getvalue()
/// b'<http://example.com> <http://example.com/p> "1" .\n' /// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyo3(signature = (output, /, format = None, *, from_graph = None))] #[pyo3(signature = (output = None, /, format = None, *, from_graph = None))]
fn dump( fn dump<'a>(
&self, &self,
output: &PyAny, output: Option<&PyAny>,
format: Option<&str>, format: Option<&str>,
from_graph: Option<&PyAny>, from_graph: Option<&PyAny>,
py: Python<'_>, py: Python<'a>,
) -> PyResult<()> { ) -> PyResult<Option<&'a PyBytes>> {
let from_graph_name = if let Some(graph_name) = from_graph { let from_graph_name = if let Some(graph_name) = from_graph {
Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?))
} else { } else {
None None
}; };
let file_path = output.extract::<PathBuf>().ok(); let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = rdf_format(format, file_path.as_deref())?; let format = rdf_format(format, file_path.as_deref())?;
let output = if let Some(file_path) = &file_path { let output = if let Some(output) = output {
PyWritable::from_file(file_path, py).map_err(map_io_err)? if let Some(file_path) = &file_path {
PyWritable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyWritable::from_data(output)
}
} else { } else {
PyWritable::from_data(output) PyWritable::Bytes(Vec::new())
}; };
py.allow_threads(|| { py.allow_threads(|| {
let output = BufWriter::new(output); let output = BufWriter::new(output);
@ -541,10 +551,9 @@ impl PyStore {
} }
.map_err(map_serializer_error)? .map_err(map_serializer_error)?
.into_inner() .into_inner()
.map_err(|e| map_io_err(e.into_error()))? .map_err(|e| map_io_err(e.into_error()))
.close() })?
.map_err(map_io_err) .close(py)
})
} }
/// Returns an iterator over all the store named graphs. /// Returns an iterator over all the store named graphs.

@ -129,6 +129,12 @@ class TestParse(unittest.TestCase):
class TestSerialize(unittest.TestCase): class TestSerialize(unittest.TestCase):
def test_serialize_to_bytes(self) -> None:
self.assertEqual(
serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle").decode(),
'<http://example.com/foo> <http://example.com/p> "éù" .\n',
)
def test_serialize_to_bytes_io(self) -> None: def test_serialize_to_bytes_io(self) -> None:
output = BytesIO() output = BytesIO()
serialize([EXAMPLE_TRIPLE.triple], output, "text/turtle") serialize([EXAMPLE_TRIPLE.triple], output, "text/turtle")

@ -289,10 +289,8 @@ class TestStore(unittest.TestCase):
def test_dump_nquads(self) -> None: def test_dump_nquads(self) -> None:
store = Store() store = Store()
store.add(Quad(foo, bar, baz, graph)) store.add(Quad(foo, bar, baz, graph))
output = BytesIO()
store.dump(output, "nq")
self.assertEqual( self.assertEqual(
output.getvalue(), store.dump(format="nq"),
b"<http://foo> <http://bar> <http://baz> <http://graph> .\n", b"<http://foo> <http://bar> <http://baz> <http://graph> .\n",
) )

Loading…
Cancel
Save