Makes Python I/O abstraction easier to use

Adds support of reading from io.TextIOBase

Adds hidden support of reading from str and bytes

Improves test coverage
pull/238/head
Tpt 3 years ago committed by Thomas Tanon
parent e686e8ce8f
commit 241bd763fa
  1. 152
      python/src/io.rs
  2. 27
      python/src/store.rs
  3. 72
      python/tests/test_io.py

@ -10,7 +10,7 @@ use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction; use pyo3::wrap_pyfunction;
use std::fs::File; use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write}; use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Write};
pub fn add_to_module(module: &PyModule) -> PyResult<()> { pub fn add_to_module(module: &PyModule) -> PyResult<()> {
module.add_wrapped(wrap_pyfunction!(parse))?; module.add_wrapped(wrap_pyfunction!(parse))?;
@ -32,7 +32,7 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> {
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase or str /// :type input: io.RawIOBase or io.BufferedIOBase or io.TextIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -53,7 +53,12 @@ pub fn parse(
base_iri: Option<&str>, base_iri: Option<&str>,
py: Python<'_>, py: Python<'_>,
) -> PyResult<PyObject> { ) -> PyResult<PyObject> {
let input = PyFileLike::open(input, py).map_err(map_io_err)?; let input = if let Ok(path) = input.extract::<&str>(py) {
PyReadable::from_file(path, py)
} else {
PyReadable::from_data(input, py)
}
.map_err(map_io_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
let mut parser = GraphParser::from_format(graph_format); let mut parser = GraphParser::from_format(graph_format);
if let Some(base_iri) = base_iri { if let Some(base_iri) = base_iri {
@ -114,7 +119,12 @@ pub fn parse(
#[pyfunction] #[pyfunction]
#[pyo3(text_signature = "(input, output, /, mime_type, *, base_iri = None)")] #[pyo3(text_signature = "(input, output, /, mime_type, *, base_iri = None)")]
pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_>) -> PyResult<()> { pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_>) -> PyResult<()> {
let output = PyFileLike::create(output, py).map_err(map_io_err)?; let output = if let Ok(path) = output.extract::<&str>(py) {
PyWritable::from_file(path, py)
} else {
PyWritable::from_data(output)
}
.map_err(map_io_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
let mut writer = GraphSerializer::from_format(graph_format) let mut writer = GraphSerializer::from_format(graph_format)
.triple_writer(output) .triple_writer(output)
@ -147,7 +157,7 @@ pub fn serialize(input: &PyAny, output: PyObject, mime_type: &str, py: Python<'_
#[pyclass(name = "TripleReader", module = "oxigraph")] #[pyclass(name = "TripleReader", module = "oxigraph")]
pub struct PyTripleReader { pub struct PyTripleReader {
inner: TripleReader<BufReader<PyFileLike>>, inner: TripleReader<PyReadable>,
} }
#[pymethods] #[pymethods]
@ -168,7 +178,7 @@ impl PyTripleReader {
#[pyclass(name = "QuadReader", module = "oxigraph")] #[pyclass(name = "QuadReader", module = "oxigraph")]
pub struct PyQuadReader { pub struct PyQuadReader {
inner: QuadReader<BufReader<PyFileLike>>, inner: QuadReader<PyReadable>,
} }
#[pymethods] #[pymethods]
@ -187,73 +197,127 @@ impl PyQuadReader {
} }
} }
pub(crate) enum PyFileLike { pub(crate) enum PyReadable {
Io(PyObject), Bytes(Cursor<Vec<u8>>),
File(File), Io(BufReader<PyIo>),
File(BufReader<File>),
} }
impl PyFileLike { impl PyReadable {
pub fn open(inner: PyObject, py: Python<'_>) -> io::Result<BufReader<Self>> { pub fn from_file(file: &str, py: Python<'_>) -> io::Result<Self> {
Ok(BufReader::new(match inner.extract::<&str>(py) { Ok(Self::File(BufReader::new(
Ok(path) => Self::File(py.allow_threads(|| File::open(path))?), py.allow_threads(|| File::open(file))?,
Err(_) => Self::Io(inner), )))
}))
} }
pub fn create(inner: PyObject, py: Python<'_>) -> io::Result<BufWriter<Self>> { pub fn from_data(data: PyObject, py: Python<'_>) -> io::Result<Self> {
Ok(BufWriter::new(match inner.extract::<&str>(py) { Ok(if let Ok(bytes) = data.extract::<Vec<u8>>(py) {
Ok(path) => Self::File(py.allow_threads(|| File::create(path))?), Self::Bytes(Cursor::new(bytes))
Err(_) => Self::Io(inner), } else if let Ok(string) = data.extract::<String>(py) {
})) Self::Bytes(Cursor::new(string.into_bytes()))
} else {
Self::Io(BufReader::new(PyIo(data)))
})
} }
} }
impl Read for PyFileLike { impl Read for PyReadable {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match self {
Self::Bytes(bytes) => bytes.read(buf),
Self::Io(io) => io.read(buf),
Self::File(file) => file.read(buf),
}
}
}
impl BufRead for PyReadable {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
match self {
Self::Bytes(bytes) => bytes.fill_buf(),
Self::Io(io) => io.fill_buf(),
Self::File(file) => file.fill_buf(),
}
}
fn consume(&mut self, amt: usize) {
match self { match self {
Self::Io(io) => { Self::Bytes(bytes) => bytes.consume(amt),
Self::Io(io) => io.consume(amt),
Self::File(file) => file.consume(amt),
}
}
}
pub(crate) enum PyWritable {
Io(BufWriter<PyIo>),
File(BufWriter<File>),
}
impl PyWritable {
pub fn from_file(file: &str, py: Python<'_>) -> io::Result<Self> {
Ok(Self::File(BufWriter::new(
py.allow_threads(|| File::create(file))?,
)))
}
pub fn from_data(data: PyObject) -> io::Result<Self> {
Ok(Self::Io(BufWriter::new(PyIo(data))))
}
}
impl Write for PyWritable {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self {
Self::Io(io) => io.write(buf),
Self::File(file) => file.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> {
match self {
Self::Io(io) => io.flush(),
Self::File(file) => file.flush(),
}
}
}
pub(crate) struct PyIo(PyObject);
impl Read for PyIo {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
let read = io let read = self
.0
.call_method(py, "read", (buf.len(),), None) .call_method(py, "read", (buf.len(),), None)
.map_err(to_io_err)?; .map_err(to_io_err)?;
let bytes: &[u8] = read.extract(py).map_err(to_io_err)?; let bytes = read
.extract::<&[u8]>(py)
.or_else(|e| read.extract::<&str>(py).map(|s| s.as_bytes()).or(Err(e)))
.map_err(to_io_err)?;
buf.write_all(bytes)?; buf.write_all(bytes)?;
Ok(bytes.len()) Ok(bytes.len())
} }
Self::File(file) => file.read(buf),
}
}
} }
impl Write for PyFileLike { impl Write for PyIo {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self {
Self::Io(io) => {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
usize::extract( self.0
io.call_method(py, "write", (PyBytes::new(py, buf),), None) .call_method(py, "write", (PyBytes::new(py, buf),), None)
.map_err(to_io_err)? .map_err(to_io_err)?
.as_ref(py), .extract::<usize>(py)
)
.map_err(to_io_err) .map_err(to_io_err)
} }
Self::File(file) => file.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> { fn flush(&mut self) -> io::Result<()> {
match self {
Self::Io(io) => {
let gil = Python::acquire_gil(); let gil = Python::acquire_gil();
let py = gil.python(); let py = gil.python();
io.call_method(py, "flush", (), None)?; self.0.call_method(py, "flush", (), None)?;
Ok(()) Ok(())
} }
Self::File(file) => file.flush(),
}
}
} }
fn to_io_err(error: impl Into<PyErr>) -> io::Error { fn to_io_err(error: impl Into<PyErr>) -> io::Error {

@ -1,6 +1,6 @@
#![allow(clippy::needless_option_as_deref)] #![allow(clippy::needless_option_as_deref)]
use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error, PyFileLike}; use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error, PyReadable, PyWritable};
use crate::model::*; use crate::model::*;
use crate::sparql::*; use crate::sparql::*;
use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::io::{DatasetFormat, GraphFormat};
@ -263,7 +263,7 @@ impl PyStore {
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase or str /// :type input: io.RawIOBase or io.BufferedIOBase or io.TextIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -293,7 +293,12 @@ impl PyStore {
} else { } else {
None None
}; };
let input = PyFileLike::open(input, py).map_err(map_io_err)?; let input = if let Ok(path) = input.extract::<&str>(py) {
PyReadable::from_file(path, py)
} else {
PyReadable::from_data(input, py)
}
.map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner
@ -342,7 +347,7 @@ impl PyStore {
/// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_. /// and ``application/xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
/// ///
/// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io.RawIOBase or io.BufferedIOBase or str /// :type input: io.RawIOBase or io.BufferedIOBase or io.TextIOBase or str
/// :param mime_type: the MIME type of the RDF serialization. /// :param mime_type: the MIME type of the RDF serialization.
/// :type mime_type: str /// :type mime_type: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -372,7 +377,12 @@ impl PyStore {
} else { } else {
None None
}; };
let input = PyFileLike::open(input, py).map_err(map_io_err)?; let input = if let Ok(path) = input.extract::<&str>(py) {
PyReadable::from_file(path, py)
} else {
PyReadable::from_data(input, py)
}
.map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner
@ -441,12 +451,17 @@ impl PyStore {
from_graph: Option<&PyAny>, from_graph: Option<&PyAny>,
py: Python<'_>, py: Python<'_>,
) -> PyResult<()> { ) -> PyResult<()> {
let output = if let Ok(path) = output.extract::<&str>(py) {
PyWritable::from_file(path, py)
} else {
PyWritable::from_data(output)
}
.map_err(map_io_err)?;
let from_graph_name = if let Some(graph_name) = from_graph { let from_graph_name = if let Some(graph_name) = from_graph {
Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?)) Some(GraphName::from(&PyGraphNameRef::try_from(graph_name)?))
} else { } else {
None None
}; };
let output = PyFileLike::create(output, py).map_err(map_io_err)?;
py.allow_threads(|| { py.allow_threads(|| {
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.inner self.inner

@ -1,44 +1,58 @@
import unittest import unittest
import io from io import StringIO, BytesIO, RawIOBase
from tempfile import NamedTemporaryFile
from pyoxigraph import * from pyoxigraph import *
class TestParse(unittest.TestCase): EXAMPLE_TRIPLE = Triple(
def test_parse(self):
input = io.BytesIO(b'<foo> <p> "1" .')
result = list(parse(input, "text/turtle", base_iri="http://example.com/"))
self.assertEqual(
result,
[
Triple(
NamedNode("http://example.com/foo"), NamedNode("http://example.com/foo"),
NamedNode("http://example.com/p"), NamedNode("http://example.com/p"),
Literal( Literal("1")
"1",
datatype=NamedNode("http://www.w3.org/2001/XMLSchema#string"),
),
)
],
) )
class TestSerialize(unittest.TestCase): class TestParse(unittest.TestCase):
def test_serialize(self): def test_parse_file(self):
output = io.BytesIO() with NamedTemporaryFile() as fp:
serialize( fp.write(b'<foo> <p> "1" .')
[ fp.flush()
Triple( self.assertEqual(
NamedNode("http://example.com"), list(parse(fp.name, "text/turtle", base_iri="http://example.com/")),
NamedNode("http://example.com/p"), [EXAMPLE_TRIPLE]
Literal("1"),
) )
],
output, def test_parse_not_existing_file(self):
"text/turtle", with self.assertRaises(IOError) as _:
parse("/tmp/not-existing-oxigraph-file.ttl", "text/turtle")
def test_parse_str_io(self):
self.assertEqual(
list(parse(StringIO('<foo> <p> "1" .'), "text/turtle", base_iri="http://example.com/")),
[EXAMPLE_TRIPLE]
) )
def test_parse_bytes_io(self):
self.assertEqual( self.assertEqual(
output.getvalue(), b'<http://example.com> <http://example.com/p> "1" .\n' list(parse(BytesIO(b'<foo> <p> "1" .'), "text/turtle", base_iri="http://example.com/")),
[EXAMPLE_TRIPLE]
) )
def test_parse_io_error(self):
class BadIO(RawIOBase):
pass
with self.assertRaises(NotImplementedError) as _:
list(parse(BadIO(), mime_type="application/n-triples"))
class TestSerialize(unittest.TestCase):
def test_serialize_to_bytes_io(self):
output = BytesIO()
serialize([EXAMPLE_TRIPLE], output, "text/turtle")
self.assertEqual(output.getvalue(), b'<http://example.com/foo> <http://example.com/p> "1" .\n')
def test_serialize_to_file(self):
with NamedTemporaryFile() as fp:
serialize([EXAMPLE_TRIPLE], fp.name, "text/turtle")
self.assertEqual(fp.read(), b'<http://example.com/foo> <http://example.com/p> "1" .\n')

Loading…
Cancel
Save