Python: I/O adds a `path` parameter to read/write from/to a file

pull/665/head
Tpt 11 months ago committed by Thomas Tanon
parent 48db7f872b
commit 261f9c64a5
  1. 2
      oxrocksdb-sys/rocksdb
  2. 69
      python/src/io.rs
  3. 56
      python/src/store.rs
  4. 38
      python/tests/test_io.py
  5. 12
      python/tests/test_store.py

@ -1 +1 @@
Subproject commit 5f2d6f0cba9858130be48ae129dd9c9dcafe0f97
Subproject commit 1ce22dd6376b124d17eff7d96e0809d2f4b4ae70

@ -30,10 +30,12 @@ use std::sync::OnceLock;
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param path: The file path to read from. Replaces the ``input`` parameter.
/// :type path: str or os.PathLike[str] or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
/// :type base_iri: str or None, optional
/// :param without_named_graphs: Sets that the parser must fail when parsing a named graph.
@ -46,26 +48,21 @@ use std::sync::OnceLock;
/// :raises SyntaxError: if the provided data is invalid.
/// :raises OSError: if a system error happens while reading the file.
///
/// >>> input = io.BytesIO(b'<foo> <p> "1" .')
/// >>> list(parse(input, "text/turtle", base_iri="http://example.com/"))
/// >>> list(parse(input=b'<foo> <p> "1" .', format="text/turtle", base_iri="http://example.com/"))
/// [<Quad subject=<NamedNode value=http://example.com/foo> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<DefaultGraph>>]
#[pyfunction]
#[pyo3(signature = (input, /, format = None, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))]
#[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))]
pub fn parse(
input: &PyAny,
input: Option<PyReadableInput>,
format: Option<&str>,
path: Option<PathBuf>,
base_iri: Option<&str>,
without_named_graphs: bool,
rename_blank_nodes: bool,
py: Python<'_>,
) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok();
let format = parse_format(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py)?
} else {
PyReadable::from_data(input)
};
let input = PyReadable::from_args(&path, input, py)?;
let format = parse_format(format, path.as_deref())?;
let mut parser = RdfParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
@ -80,7 +77,7 @@ pub fn parse(
}
Ok(PyQuadReader {
inner: parser.parse_read(input),
file_path,
file_path: path,
}
.into_py(py))
}
@ -120,7 +117,7 @@ pub fn parse(
/// >>> output.getvalue()
/// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyfunction]
#[pyo3(signature = (input, output = None, /, format = None))]
#[pyo3(signature = (input, output = None, format = None))]
pub fn serialize<'a>(
input: &PyAny,
output: Option<&PyAny>,
@ -167,13 +164,12 @@ impl PyQuadReader {
fn __next__(&mut self, py: Python<'_>) -> PyResult<Option<PyQuad>> {
py.allow_threads(|| {
self.inner
Ok(self
.inner
.next()
.map(|q| {
Ok(q.map_err(|e| map_parse_error(e, self.file_path.clone()))?
.into())
})
.transpose()
.map_err(|e| map_parse_error(e, self.file_path.clone()))?
.map(PyQuad::from))
})
}
}
@ -185,6 +181,22 @@ pub enum PyReadable {
}
impl PyReadable {
pub fn from_args(
path: &Option<PathBuf>,
input: Option<PyReadableInput>,
py: Python<'_>,
) -> PyResult<Self> {
match (path, input) {
(Some(_), Some(_)) => Err(PyValueError::new_err(
"input and file_path can't be both set at the same time",
)),
(Some(path), None) => Ok(PyReadable::from_file(path, py)?),
(None, Some(input)) => Ok(input.into()),
(None, None) => Err(PyValueError::new_err(
"Either input or file_path must be set",
)),
}
}
pub fn from_file(file: &Path, py: Python<'_>) -> io::Result<Self> {
Ok(Self::File(py.allow_threads(|| File::open(file))?))
}
@ -210,6 +222,23 @@ impl Read for PyReadable {
}
}
#[derive(FromPyObject)]
pub enum PyReadableInput {
String(String),
Bytes(Vec<u8>),
Io(PyObject),
}
impl From<PyReadableInput> for PyReadable {
fn from(input: PyReadableInput) -> Self {
match input {
PyReadableInput::String(string) => Self::Bytes(Cursor::new(string.into_bytes())),
PyReadableInput::Bytes(bytes) => Self::Bytes(Cursor::new(bytes)),
PyReadableInput::Io(io) => Self::Io(PyIo(io)),
}
}
}
pub enum PyWritable {
Bytes(Vec<u8>),
Io(PyIo),

@ -1,6 +1,8 @@
#![allow(clippy::needless_option_as_deref)]
use crate::io::{allow_threads_unsafe, map_parse_error, parse_format, PyReadable, PyWritable};
use crate::io::{
allow_threads_unsafe, map_parse_error, parse_format, PyReadable, PyReadableInput, PyWritable,
};
use crate::model::*;
use crate::sparql::*;
use oxigraph::io::RdfFormat;
@ -360,10 +362,12 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param path: The file path to read from. Replaces the ``input`` parameter.
/// :type path: str or os.PathLike[str] or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
/// :type base_iri: str or None, optional
/// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used.
@ -374,14 +378,15 @@ impl PyStore {
/// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file.
///
/// >>> store = Store()
/// >>> store.load(io.BytesIO(b'<foo> <p> "1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g"))
/// >>> store.load(input='<foo> <p> "1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g"))
/// >>> list(store)
/// [<Quad subject=<NamedNode value=http://example.com/foo> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))]
#[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))]
fn load(
&self,
input: &PyAny,
input: Option<PyReadableInput>,
format: Option<&str>,
path: Option<PathBuf>,
base_iri: Option<&str>,
to_graph: Option<&PyAny>,
py: Python<'_>,
@ -391,13 +396,8 @@ impl PyStore {
} else {
None
};
let file_path = input.extract::<PathBuf>().ok();
let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py)?
} else {
PyReadable::from_data(input)
};
let input = PyReadable::from_args(&path, input, py)?;
let format: RdfFormat = parse_format(format, path.as_deref())?;
py.allow_threads(|| {
if let Some(to_graph_name) = to_graph_name {
self.inner
@ -405,7 +405,7 @@ impl PyStore {
} else {
self.inner.load_dataset(input, format, base_iri)
}
.map_err(|e| map_loader_error(e, file_path))
.map_err(|e| map_loader_error(e, path))
})
}
@ -429,10 +429,12 @@ impl PyStore {
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param path: The file path to read from. Replaces the ``input`` parameter.
/// :type path: str or os.PathLike[str] or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
/// :type base_iri: str or None, optional
/// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used.
@ -443,14 +445,15 @@ impl PyStore {
/// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file.
///
/// >>> store = Store()
/// >>> store.bulk_load(io.BytesIO(b'<foo> <p> "1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g"))
/// >>> store.bulk_load(input=b'<foo> <p> "1" .', format="text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g"))
/// >>> list(store)
/// [<Quad subject=<NamedNode value=http://example.com/foo> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[pyo3(signature = (input, /, format = None, *, base_iri = None, to_graph = None))]
#[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))]
fn bulk_load(
&self,
input: &PyAny,
input: Option<PyReadableInput>,
format: Option<&str>,
path: Option<PathBuf>,
base_iri: Option<&str>,
to_graph: Option<&PyAny>,
py: Python<'_>,
@ -460,13 +463,8 @@ impl PyStore {
} else {
None
};
let file_path = input.extract::<PathBuf>().ok();
let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py)?
} else {
PyReadable::from_data(input)
};
let input = PyReadable::from_args(&path, input, py)?;
let format: RdfFormat = parse_format(format, path.as_deref())?;
py.allow_threads(|| {
if let Some(to_graph_name) = to_graph_name {
self.inner
@ -477,7 +475,7 @@ impl PyStore {
.bulk_loader()
.load_dataset(input, format, base_iri)
}
.map_err(|e| map_loader_error(e, file_path))
.map_err(|e| map_loader_error(e, path))
})
}

@ -33,13 +33,37 @@ class TestParse(unittest.TestCase):
fp.write('<foo> <p> "éù" .'.encode())
fp.flush()
self.assertEqual(
list(parse(fp.name, base_iri="http://example.com/")),
list(parse(path=fp.name, base_iri="http://example.com/")),
[EXAMPLE_TRIPLE],
)
def test_parse_not_existing_file(self) -> None:
with self.assertRaises(IOError) as _:
parse("/tmp/not-existing-oxigraph-file.ttl", "text/turtle")
parse(path="/tmp/not-existing-oxigraph-file.ttl", format="text/turtle")
def test_parse_str(self) -> None:
self.assertEqual(
list(
parse(
'<foo> <p> "éù" .',
"text/turtle",
base_iri="http://example.com/",
)
),
[EXAMPLE_TRIPLE],
)
def test_parse_bytes(self) -> None:
self.assertEqual(
list(
parse(
'<foo> <p> "éù" .'.encode(),
"text/turtle",
base_iri="http://example.com/",
)
),
[EXAMPLE_TRIPLE],
)
def test_parse_str_io(self) -> None:
self.assertEqual(
@ -85,7 +109,7 @@ class TestParse(unittest.TestCase):
self.assertEqual(
list(
parse(
StringIO('<g> { <foo> <p> "1" }'),
'<g> { <foo> <p> "1" }',
"application/trig",
base_iri="http://example.com/",
)
@ -99,7 +123,7 @@ class TestParse(unittest.TestCase):
fp.write(b'<foo> "p" "1"')
fp.flush()
with self.assertRaises(SyntaxError) as ctx:
list(parse(fp.name, "text/turtle"))
list(parse(path=fp.name, format="text/turtle"))
self.assertEqual(ctx.exception.filename, fp.name)
self.assertEqual(ctx.exception.lineno, 2)
self.assertEqual(ctx.exception.offset, 7)
@ -111,7 +135,7 @@ class TestParse(unittest.TestCase):
with self.assertRaises(SyntaxError) as _:
list(
parse(
StringIO('<g> { <foo> <p> "1" }'),
'<g> { <foo> <p> "1" }',
"application/trig",
base_iri="http://example.com/",
without_named_graphs=True,
@ -122,14 +146,14 @@ class TestParse(unittest.TestCase):
self.assertNotEqual(
list(
parse(
StringIO('_:s <http://example.com/p> "o" .'),
'_:s <http://example.com/p> "o" .',
"application/n-triples",
rename_blank_nodes=True,
)
),
list(
parse(
StringIO('_:s <http://example.com/p> "o" .'),
'_:s <http://example.com/p> "o" .',
"application/n-triples",
rename_blank_nodes=True,
)

@ -1,5 +1,5 @@
import unittest
from io import BytesIO, UnsupportedOperation
from io import BytesIO, StringIO, UnsupportedOperation
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory, TemporaryFile
from typing import Any
@ -253,7 +253,7 @@ class TestStore(unittest.TestCase):
def test_load_ntriples_to_default_graph(self) -> None:
store = Store()
store.load(
BytesIO(b"<http://foo> <http://bar> <http://baz> ."),
b"<http://foo> <http://bar> <http://baz> .",
"application/n-triples",
)
self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())})
@ -261,7 +261,7 @@ class TestStore(unittest.TestCase):
def test_load_ntriples_to_named_graph(self) -> None:
store = Store()
store.load(
BytesIO(b"<http://foo> <http://bar> <http://baz> ."),
"<http://foo> <http://bar> <http://baz> .",
"application/n-triples",
to_graph=graph,
)
@ -279,7 +279,7 @@ class TestStore(unittest.TestCase):
def test_load_nquads(self) -> None:
store = Store()
store.load(
BytesIO(b"<http://foo> <http://bar> <http://baz> <http://graph>."),
StringIO("<http://foo> <http://bar> <http://baz> <http://graph>."),
"nq",
)
self.assertEqual(set(store), {Quad(foo, bar, baz, graph)})
@ -287,7 +287,7 @@ class TestStore(unittest.TestCase):
def test_load_trig_with_base_iri(self) -> None:
store = Store()
store.load(
BytesIO(b"<http://graph> { <http://foo> <http://bar> <> . }"),
"<http://graph> { <http://foo> <http://bar> <> . }",
"application/trig",
base_iri="http://baz",
)
@ -298,7 +298,7 @@ class TestStore(unittest.TestCase):
fp.write(b"<http://foo> <http://bar> <http://baz> <http://graph>.")
fp.flush()
store = Store()
store.load(fp.name)
store.load(path=fp.name)
self.assertEqual(set(store), {Quad(foo, bar, baz, graph)})
def test_load_with_io_error(self) -> None:

Loading…
Cancel
Save