Python: Allows to give a file path to I/O methods

pull/190/head
Tpt 3 years ago
parent 4327f8e729
commit 66cfb3ebf6
  1. 63
      python/src/io.rs
  2. 21
      python/src/store.rs
  3. 24
      python/tests/test_store.py

@ -9,6 +9,7 @@ use pyo3::exceptions::{PyIOError, PySyntaxError, PyValueError};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction; use pyo3::wrap_pyfunction;
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write}; use std::io::{self, BufReader, BufWriter, Read, Write};
pub fn add_to_module(module: &PyModule) -> PyResult<()> { pub fn add_to_module(module: &PyModule) -> PyResult<()> {
@ -30,8 +31,8 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_ /// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase /// :type input: io.RawIOBase or io.BufferedIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -52,7 +53,7 @@ pub fn parse(
base_iri: Option<&str>, base_iri: Option<&str>,
py: Python<'_>, py: Python<'_>,
) -> PyResult<PyObject> { ) -> PyResult<PyObject> {
let input = BufReader::new(PyFileLike::new(input)); let input = PyFileLike::open(input, py).map_err(map_io_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
let mut parser = GraphParser::from_format(graph_format); let mut parser = GraphParser::from_format(graph_format);
if let Some(base_iri) = base_iri { if let Some(base_iri) = base_iri {
@ -99,8 +100,8 @@ pub fn parse(
/// ///
/// :param input: the RDF triples and quads to serialize. /// :param input: the RDF triples and quads to serialize.
/// :type input: iter(Triple) or iter(Quad) /// :type input: iter(Triple) or iter(Quad)
/// :param output: The binary I/O object to write to. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``.
/// :type output: io.RawIOBase or io.BufferedIOBase /// :type output: io.RawIOBase or io.BufferedIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :raises ValueError: if the MIME type is not supported. /// :raises ValueError: if the MIME type is not supported.
@ -112,8 +113,8 @@ pub fn parse(
/// b'<http://example.com> <http://example.com/p> "1" .\n' /// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyfunction] #[pyfunction]
#[pyo3(text_signature = "(input, output, /, mime_type, *, base_iri = None)")] #[pyo3(text_signature = "(input, output, /, mime_type, *, base_iri = None)")]
pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str) -> PyResult<()> { pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_>) -> PyResult<()> {
let output = BufWriter::new(PyFileLike::new(output)); let output = PyFileLike::create(output, py).map_err(map_io_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
let mut writer = GraphSerializer::from_format(graph_format) let mut writer = GraphSerializer::from_format(graph_format)
.triple_writer(output) .triple_writer(output)
@ -186,49 +187,73 @@ impl PyQuadReader {
} }
} }
pub struct PyFileLike { pub(crate) enum PyFileLike {
inner: PyObject, Io(PyObject),
File(File),
} }
impl PyFileLike { impl PyFileLike {
pub fn new(inner: PyObject) -> Self { pub fn open(inner: PyObject, py: Python<'_>) -> io::Result<BufReader<Self>> {
Self { inner } Ok(BufReader::new(match inner.extract::<&str>(py) {
Ok(path) => Self::File(py.allow_threads(|| File::open(path))?),
Err(_) => Self::Io(inner),
}))
}
pub fn create(inner: PyObject, py: Python<'_>) -> io::Result<BufWriter<Self>> {
Ok(BufWriter::new(match inner.extract::<&str>(py) {
Ok(path) => Self::File(py.allow_threads(|| File::create(path))?),
Err(_) => Self::Io(inner),
}))
} }
} }
impl Read for PyFileLike { impl Read for PyFileLike {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> { fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
match self {
Self::Io(io) => {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
let read = self let read = io
.inner
.call_method(py, "read", (buf.len(),), None) .call_method(py, "read", (buf.len(),), None)
.map_err(to_io_err)?; .map_err(to_io_err)?;
let bytes: &PyBytes = read.cast_as(py).map_err(to_io_err)?; let bytes: &[u8] = read.extract(py).map_err(to_io_err)?;
buf.write_all(bytes.as_bytes())?; buf.write_all(bytes)?;
Ok(bytes.len()?) Ok(bytes.len())
}
Self::File(file) => file.read(buf),
}
} }
} }
impl Write for PyFileLike { impl Write for PyFileLike {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self {
Self::Io(io) => {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
usize::extract( usize::extract(
self.inner io.call_method(py, "write", (PyBytes::new(py, buf),), None)
.call_method(py, "write", (PyBytes::new(py, buf),), None)
.map_err(to_io_err)? .map_err(to_io_err)?
.as_ref(py), .as_ref(py),
) )
.map_err(to_io_err) .map_err(to_io_err)
} }
Self::File(file) => file.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> { fn flush(&mut self) -> io::Result<()> {
match self {
Self::Io(io) => {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
self.inner.call_method(py, "flush", (), None)?; io.call_method(py, "flush", (), None)?;
Ok(()) Ok(())
} }
Self::File(file) => file.flush(),
}
}
} }
fn to_io_err(error: impl Into<PyErr>) -> io::Error { fn to_io_err(error: impl Into<PyErr>) -> io::Error {

@ -1,6 +1,6 @@
#![allow(clippy::needless_option_as_deref)] #![allow(clippy::needless_option_as_deref)]
use crate::io::{allow_threads_unsafe, map_parse_error, PyFileLike}; use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error, PyFileLike};
use crate::model::*; use crate::model::*;
use crate::sparql::*; use crate::sparql::*;
use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::io::{DatasetFormat, GraphFormat};
@ -10,7 +10,6 @@ use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store};
use pyo3::exceptions::{PyIOError, PyRuntimeError, PyValueError}; use pyo3::exceptions::{PyIOError, PyRuntimeError, PyValueError};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::{Py, PyRef}; use pyo3::{Py, PyRef};
use std::io::{BufReader, BufWriter};
/// RDF store. /// RDF store.
/// ///
@ -263,8 +262,8 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_ /// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase /// :type input: io.RawIOBase or io.BufferedIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -294,8 +293,8 @@ impl PyStore {
} else { } else {
None None
}; };
let input = PyFileLike::open(input, py).map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
let input = BufReader::new(PyFileLike::new(input));
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner
.load_graph( .load_graph(
@ -342,8 +341,8 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_ /// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase /// :type input: io.RawIOBase or io.BufferedIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -373,8 +372,8 @@ impl PyStore {
} else { } else {
None None
}; };
let input = PyFileLike::open(input, py).map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
let input = BufReader::new(PyFileLike::new(input));
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner
.bulk_load_graph( .bulk_load_graph(
@ -416,8 +415,8 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_ /// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param output: The binary I/O object to write to. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'wb')``. /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase /// :type output: io.RawIOBase or io.BufferedIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param from_graph: if a triple based format is requested, the store graph from which dump the triples. By default, the default graph is used. /// :param from_graph: if a triple based format is requested, the store graph from which dump the triples. By default, the default graph is used.
@ -445,8 +444,8 @@ impl PyStore {
} else { } else {
None None
}; };
let output = PyFileLike::create(output, py).map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
let output = BufWriter::new(PyFileLike::new(output));
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner
.dump_graph( .dump_graph(

@ -1,7 +1,9 @@
import os
import unittest import unittest
from io import BytesIO, RawIOBase from io import BytesIO, RawIOBase
from pyoxigraph import * from pyoxigraph import *
from tempfile import NamedTemporaryFile
foo = NamedNode("http://foo") foo = NamedNode("http://foo")
bar = NamedNode("http://bar") bar = NamedNode("http://bar")
@ -221,6 +223,15 @@ class TestStore(unittest.TestCase):
) )
self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) self.assertEqual(set(store), {Quad(foo, bar, baz, graph)})
def test_load_file(self):
with NamedTemporaryFile(delete=False) as fp:
file_name = fp.name
fp.write(b"<http://foo> <http://bar> <http://baz> <http://graph>.")
store = Store()
store.load(file_name, mime_type="application/n-quads")
os.remove(file_name)
self.assertEqual(set(store), {Quad(foo, bar, baz, graph)})
def test_load_with_io_error(self): def test_load_with_io_error(self):
class BadIO(RawIOBase): class BadIO(RawIOBase):
pass pass
@ -247,6 +258,19 @@ class TestStore(unittest.TestCase):
b"<http://foo> <http://bar> <http://baz> <http://graph> .\n", b"<http://foo> <http://bar> <http://baz> <http://graph> .\n",
) )
def test_dump_file(self):
with NamedTemporaryFile(delete=False) as fp:
file_name = fp.name
store = Store()
store.add(Quad(foo, bar, baz, graph))
store.dump(file_name, "application/n-quads")
with open(file_name, 'rt') as fp:
file_content = fp.read()
self.assertEqual(
file_content,
"<http://foo> <http://bar> <http://baz> <http://graph> .\n",
)
def test_dump_with_io_error(self): def test_dump_with_io_error(self):
class BadIO(RawIOBase): class BadIO(RawIOBase):
pass pass

Loading…
Cancel
Save