Python: harmonizes parse_query_results and parse signatures

pull/668/head
Tpt 1 year ago committed by Thomas Tanon
parent e6d98445e6
commit cc41448b18
  1. 15
      python/src/io.rs
  2. 63
      python/src/sparql.rs
  3. 20
      python/tests/test_io.py

@ -190,26 +190,13 @@ impl PyReadable {
(Some(_), Some(_)) => Err(PyValueError::new_err( (Some(_), Some(_)) => Err(PyValueError::new_err(
"input and file_path can't be both set at the same time", "input and file_path can't be both set at the same time",
)), )),
(Some(path), None) => Ok(PyReadable::from_file(path, py)?), (Some(path), None) => Ok(Self::File(py.allow_threads(|| File::open(path))?)),
(None, Some(input)) => Ok(input.into()), (None, Some(input)) => Ok(input.into()),
(None, None) => Err(PyValueError::new_err( (None, None) => Err(PyValueError::new_err(
"Either input or file_path must be set", "Either input or file_path must be set",
)), )),
} }
} }
pub fn from_file(file: &Path, py: Python<'_>) -> io::Result<Self> {
Ok(Self::File(py.allow_threads(|| File::open(file))?))
}
pub fn from_data(data: &PyAny) -> Self {
if let Ok(bytes) = data.extract::<Vec<u8>>() {
Self::Bytes(Cursor::new(bytes))
} else if let Ok(string) = data.extract::<String>() {
Self::Bytes(Cursor::new(string.into_bytes()))
} else {
Self::Io(PyIo(data.into()))
}
}
} }
impl Read for PyReadable { impl Read for PyReadable {

@ -12,7 +12,7 @@ use oxigraph::sparql::{
Variable, Variable,
}; };
use pyo3::basic::CompareOp; use pyo3::basic::CompareOp;
use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PyTypeError, PyValueError}; use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PyValueError};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use std::io; use std::io;
@ -132,22 +132,13 @@ impl PyQuerySolution {
self.inner.len() self.inner.len()
} }
fn __getitem__(&self, input: &PyAny) -> PyResult<Option<PyTerm>> { fn __getitem__(&self, key: PySolutionKey<'_>) -> Option<PyTerm> {
if let Ok(key) = usize::extract(input) { match key {
Ok(self.inner.get(key).map(|term| PyTerm::from(term.clone()))) PySolutionKey::Usize(key) => self.inner.get(key),
} else if let Ok(key) = <&str>::extract(input) { PySolutionKey::Str(key) => self.inner.get(key),
Ok(self.inner.get(key).map(|term| PyTerm::from(term.clone()))) PySolutionKey::Variable(key) => self.inner.get(<&Variable>::from(&*key)),
} else if let Ok(key) = input.extract::<PyRef<PyVariable>>() {
Ok(self
.inner
.get(<&Variable>::from(&*key))
.map(|term| PyTerm::from(term.clone())))
} else {
Err(PyTypeError::new_err(format!(
"{} is not an integer of a string",
input.get_type().name()?,
)))
} }
.map(|term| PyTerm::from(term.clone()))
} }
#[allow(clippy::unnecessary_to_owned)] #[allow(clippy::unnecessary_to_owned)]
@ -158,6 +149,13 @@ impl PyQuerySolution {
} }
} }
#[derive(FromPyObject)]
pub enum PySolutionKey<'a> {
Usize(usize),
Str(&'a str),
Variable(PyRef<'a, PyVariable>),
}
#[pyclass(module = "pyoxigraph")] #[pyclass(module = "pyoxigraph")]
pub struct SolutionValueIter { pub struct SolutionValueIter {
inner: IntoIter<Option<Term>>, inner: IntoIter<Option<Term>>,
@ -460,43 +458,42 @@ impl PyQueryTriples {
/// It supports also some media type and extension aliases. /// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_. /// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
/// ///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional /// :type format: str or None, optional
/// :param path: The file path to read from. Replaces the ``input`` parameter.
/// :type path: str or os.PathLike[str] or None, optional
/// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`. /// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`.
/// :rtype: QuerySolutions or QueryBoolean /// :rtype: QuerySolutions or QueryBoolean
/// :raises ValueError: if the format is not supported. /// :raises ValueError: if the format is not supported.
/// :raises SyntaxError: if the provided data is invalid. /// :raises SyntaxError: if the provided data is invalid.
/// :raises OSError: if a system error happens while reading the file. /// :raises OSError: if a system error happens while reading the file.
/// ///
/// >>> input = io.BytesIO(b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t1\n') /// >>> list(parse_query_results('?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t1\n', "text/tsv"))
/// >>> list(parse_query_results(input, "text/tsv"))
/// [<QuerySolution s=<NamedNode value=http://example.com/s> p=<NamedNode value=http://example.com/s> o=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#integer>>>] /// [<QuerySolution s=<NamedNode value=http://example.com/s> p=<NamedNode value=http://example.com/s> o=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#integer>>>]
/// ///
/// >>> input = io.BytesIO(b'{"head":{},"boolean":true}') /// >>> parse_query_results('{"head":{},"boolean":true}', "application/sparql-results+json")
/// >>> parse_query_results(input, "application/sparql-results+json")
/// <QueryBoolean true> /// <QueryBoolean true>
#[pyfunction] #[pyfunction]
#[pyo3(signature = (input, /, format = None))] #[pyo3(signature = (input = None, format = None, *, path = None))]
pub fn parse_query_results( pub fn parse_query_results(
input: &PyAny, input: Option<PyReadableInput>,
format: Option<&str>, format: Option<&str>,
path: Option<PathBuf>,
py: Python<'_>, py: Python<'_>,
) -> PyResult<PyObject> { ) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok(); let input = PyReadable::from_args(&path, input, py)?;
let format = parse_format(format, file_path.as_deref())?; let format = parse_format(format, path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py)?
} else {
PyReadable::from_data(input)
};
let results = QueryResultsParser::from_format(format) let results = QueryResultsParser::from_format(format)
.parse_read(input) .parse_read(input)
.map_err(|e| map_query_results_parse_error(e, file_path.clone()))?; .map_err(|e| map_query_results_parse_error(e, path.clone()))?;
Ok(match results { Ok(match results {
FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions { FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions {
inner: PyQuerySolutionsVariant::Reader { iter, file_path }, inner: PyQuerySolutionsVariant::Reader {
iter,
file_path: path,
},
} }
.into_py(py), .into_py(py),
FromReadQueryResultsReader::Boolean(inner) => PyQueryBoolean { inner }.into_py(py), FromReadQueryResultsReader::Boolean(inner) => PyQueryBoolean { inner }.into_py(py),

@ -202,7 +202,7 @@ class TestParseQuerySolutions(unittest.TestCase):
with NamedTemporaryFile(suffix=".tsv") as fp: with NamedTemporaryFile(suffix=".tsv") as fp:
fp.write(b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t"1"\n') fp.write(b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t"1"\n')
fp.flush() fp.flush()
r = parse_query_results(fp.name) r = parse_query_results(path=fp.name)
self.assertIsInstance(r, QuerySolutions) self.assertIsInstance(r, QuerySolutions)
results = list(r) # type: ignore[arg-type] results = list(r) # type: ignore[arg-type]
self.assertEqual(results[0]["s"], NamedNode("http://example.com/s")) self.assertEqual(results[0]["s"], NamedNode("http://example.com/s"))
@ -210,10 +210,20 @@ class TestParseQuerySolutions(unittest.TestCase):
def test_parse_not_existing_file(self) -> None: def test_parse_not_existing_file(self) -> None:
with self.assertRaises(IOError) as _: with self.assertRaises(IOError) as _:
parse_query_results("/tmp/not-existing-oxigraph-file.ttl", "application/json") parse_query_results(path="/tmp/not-existing-oxigraph-file.ttl", format="application/json")
def test_parse_str(self) -> None:
result = parse_query_results("true", "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertTrue(result)
def test_parse_bytes(self) -> None:
result = parse_query_results(b"false", "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertFalse(result)
def test_parse_str_io(self) -> None: def test_parse_str_io(self) -> None:
result = parse_query_results(StringIO("true"), "tsv") result = parse_query_results("true", "tsv")
self.assertIsInstance(result, QueryBoolean) self.assertIsInstance(result, QueryBoolean)
self.assertTrue(result) self.assertTrue(result)
@ -231,7 +241,7 @@ class TestParseQuerySolutions(unittest.TestCase):
fp.write(b"{]") fp.write(b"{]")
fp.flush() fp.flush()
with self.assertRaises(SyntaxError) as ctx: with self.assertRaises(SyntaxError) as ctx:
list(parse_query_results(fp.name, "srj")) # type: ignore[arg-type] list(parse_query_results(path=fp.name, format="srj")) # type: ignore[arg-type]
self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.filename, fp.name)
self.assertEqual(ctx.exception.lineno, 1) self.assertEqual(ctx.exception.lineno, 1)
self.assertEqual(ctx.exception.offset, 2) self.assertEqual(ctx.exception.offset, 2)
@ -245,7 +255,7 @@ class TestParseQuerySolutions(unittest.TestCase):
fp.write(b"1\t<foo >\n") fp.write(b"1\t<foo >\n")
fp.flush() fp.flush()
with self.assertRaises(SyntaxError) as ctx: with self.assertRaises(SyntaxError) as ctx:
list(parse_query_results(fp.name, "tsv")) # type: ignore[arg-type] list(parse_query_results(path=fp.name, format="tsv")) # type: ignore[arg-type]
self.assertEqual(ctx.exception.filename, fp.name) self.assertEqual(ctx.exception.filename, fp.name)
self.assertEqual(ctx.exception.lineno, 2) self.assertEqual(ctx.exception.lineno, 2)
self.assertEqual(ctx.exception.offset, 3) self.assertEqual(ctx.exception.offset, 3)

Loading…
Cancel
Save