From d5ca8fedd13bf8392b02b79dd3d88ad425779928 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 9 Aug 2020 16:11:00 +0200 Subject: [PATCH] pyoxigraph: Exposes SPARQL results internals --- CHANGELOG.md | 1 + python/docs/index.rst | 1 + python/docs/sparql.rst | 23 +++++ python/src/lib.rs | 6 ++ python/src/memory_store.rs | 5 +- python/src/model.rs | 72 ++++++++++++++ python/src/sled_store.rs | 5 +- python/src/sparql.rs | 195 +++++++++++++++++++++++++++++++++++++ python/src/store_utils.rs | 144 +-------------------------- python/tests/test_model.py | 12 +++ python/tests/test_store.py | 45 ++++++--- 11 files changed, 349 insertions(+), 160 deletions(-) create mode 100644 python/docs/sparql.rst create mode 100644 python/src/sparql.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 898fd54f..294e5ef6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - `QueryOptions` now allows settings the query dataset graph URIs (the SPARQL protocol `default-graph-uri` and `named-graph-uri` parameters). - `pyoxigraph` store `query` methods allows to provide the dataset graph URIs. It also provides an option to use all graph names as the default graph. - "default graph as union option" now works with FROM NAMED. +- `pyoxigraph` now exposes and documents `Variable`, `QuerySolution`, `QuerySolutions` and `QueryTriples` ## [0.1.0-rc.1] - 2020-08-01 diff --git a/python/docs/index.rst b/python/docs/index.rst index ba460a93..38035e47 100644 --- a/python/docs/index.rst +++ b/python/docs/index.rst @@ -66,3 +66,4 @@ Table of contents io store/memory store/sled + sparql diff --git a/python/docs/sparql.rst b/python/docs/sparql.rst new file mode 100644 index 00000000..4c6a0479 --- /dev/null +++ b/python/docs/sparql.rst @@ -0,0 +1,23 @@ +SPARQL utility objects +============================= + +Oxigraph provides also some utilities related to SPARQL queries: + + +Variable +"""""""" +.. autoclass:: pyoxigraph.Variable + :members: + + +``SELECT`` solutions +"""""""""""""""""""" +.. autoclass:: pyoxigraph.QuerySolutions + :members: +.. autoclass:: pyoxigraph.QuerySolution + :members: + +``CONSTRUCT`` results +""""""""""""""""""""" +.. autoclass:: pyoxigraph.QueryTriples + :members: diff --git a/python/src/lib.rs b/python/src/lib.rs index e3e413aa..ac861e7f 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -12,11 +12,13 @@ mod io; mod memory_store; mod model; mod sled_store; +mod sparql; mod store_utils; use crate::memory_store::*; use crate::model::*; use crate::sled_store::*; +use crate::sparql::*; use pyo3::prelude::*; /// Oxigraph Python bindings @@ -34,5 +36,9 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> { module.add_class::()?; module.add_class::()?; module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; io::add_to_module(module) } diff --git a/python/src/memory_store.rs b/python/src/memory_store.rs index f04ce93b..db60237b 100644 --- a/python/src/memory_store.rs +++ b/python/src/memory_store.rs @@ -1,5 +1,6 @@ use crate::io::PyFileLike; use crate::model::*; +use crate::sparql::*; use crate::store_utils::*; use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::model::*; @@ -113,8 +114,8 @@ impl PyMemoryStore { /// :type default_graph_uris: list(NamedNode),None /// :param named_graph_uris: optional, list of the named graph URIs that could be used in SPARQL `GRAPH` clause. By default all the store default graphs are available. /// :type named_graph_uris: list(NamedNode),None - /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of solution bindings for ``SELECT`` queries. - /// :rtype: iter(QuerySolution) or iter(Triple) or bool + /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries. + /// :rtype: QuerySolutions or QueryTriples or bool /// :raises SyntaxError: if the provided query is invalid /// /// ``SELECT`` query: diff --git a/python/src/model.rs b/python/src/model.rs index 6c4c9c13..ad8500e9 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -1,4 +1,5 @@ use oxigraph::model::*; +use oxigraph::sparql::Variable; use pyo3::basic::CompareOp; use pyo3::exceptions::{IndexError, NotImplementedError, TypeError, ValueError}; use pyo3::prelude::*; @@ -696,6 +697,77 @@ impl PyIterProtocol for PyQuad { } } +/// A SPARQL query variable +/// +/// :param value: the variable name as a string +/// :type value: str +/// +/// The :py:func:`str` function provides a serialization compatible with SPARQL: +/// +/// >>> str(Variable('foo')) +/// '?foo' +#[pyclass(name = Variable)] +#[text_signature = "(value)"] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct PyVariable { + inner: Variable, +} + +impl From for PyVariable { + fn from(inner: Variable) -> Self { + Self { inner } + } +} + +impl From for Variable { + fn from(variable: PyVariable) -> Self { + variable.inner + } +} + +impl<'a> From<&'a PyVariable> for &'a Variable { + fn from(variable: &'a PyVariable) -> Self { + &variable.inner + } +} + +#[pymethods] +impl PyVariable { + #[new] + fn new(value: String) -> Self { + Variable::new(value).into() + } + + /// :return: the variable name + /// :rtype: str + /// + /// >>> Variable("foo").value + /// 'foo' + #[getter] + fn value(&self) -> &str { + self.inner.as_str() + } +} + +#[pyproto] +impl PyObjectProtocol for PyVariable { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + format!("", self.inner.as_str()) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> PyResult { + eq_compare(self, &other.borrow(), op) + } +} + pub fn extract_named_node(py: &PyAny) -> PyResult { if let Ok(node) = py.downcast::>() { Ok(node.borrow().clone().into()) diff --git a/python/src/sled_store.rs b/python/src/sled_store.rs index 1cd81513..e7cff892 100644 --- a/python/src/sled_store.rs +++ b/python/src/sled_store.rs @@ -1,5 +1,6 @@ use crate::io::PyFileLike; use crate::model::*; +use crate::sparql::*; use crate::store_utils::*; use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::model::*; @@ -128,8 +129,8 @@ impl PySledStore { /// :type default_graph_uris: list(NamedNode),None /// :param named_graph_uris: optional, list of the named graph URIs that could be used in SPARQL `GRAPH` clause. By default all the store default graphs are available. /// :type named_graph_uris: list(NamedNode),None - /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of solution bindings for ``SELECT`` queries. - /// :rtype: iter(QuerySolution) or iter(Triple) or bool + /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries. + /// :rtype: QuerySolutions or QueryTriples or bool /// :raises SyntaxError: if the provided query is invalid /// :raises IOError: if an I/O error happens while reading the store /// diff --git a/python/src/sparql.rs b/python/src/sparql.rs new file mode 100644 index 00000000..791bba5d --- /dev/null +++ b/python/src/sparql.rs @@ -0,0 +1,195 @@ +use crate::model::*; +use crate::store_utils::*; +use oxigraph::sparql::*; +use pyo3::exceptions::{RuntimeError, SyntaxError, TypeError, ValueError}; +use pyo3::prelude::*; +use pyo3::{PyIterProtocol, PyMappingProtocol, PyNativeType, PyObjectProtocol}; + +pub fn build_query_options( + use_default_graph_as_union: bool, + default_graph_uris: Option>, + named_graph_uris: Option>, +) -> PyResult { + let mut options = QueryOptions::default(); + if use_default_graph_as_union { + options = options.with_default_graph_as_union(); + } + if let Some(default_graph_uris) = default_graph_uris { + if default_graph_uris.is_empty() { + return Err(ValueError::py_err( + "The list of the default graph URIs could not be empty", + )); + } + for default_graph_uri in default_graph_uris { + options = options.with_default_graph(default_graph_uri); + } + } + if let Some(named_graph_uris) = named_graph_uris { + if named_graph_uris.is_empty() { + return Err(ValueError::py_err( + "The list of the named graph URIs could not be empty", + )); + } + for named_graph_uri in named_graph_uris { + options = options.with_named_graph(named_graph_uri); + } + } + Ok(options) +} + +pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyResult { + Ok(match results { + QueryResults::Solutions(inner) => PyQuerySolutions { inner }.into_py(py), + QueryResults::Graph(inner) => PyQueryTriples { inner }.into_py(py), + QueryResults::Boolean(b) => b.into_py(py), + }) +} + +/// Tuple associating variables and terms that are the result of a SPARQL ``SELECT`` query. +/// +/// It is the equivalent of a row in SQL. +/// +/// It could be indexes by variable name (:py:class:`Variable` or :py:class:`str`) or position in the tuple (:py:class:`int`). +/// +/// >>> store = SledStore() +/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) +/// >>> solution = next(store.query('SELECT ?s ?p ?o WHERE { ?s ?p ?o }')) +/// >>> solution[Variable('s')] +/// +/// >>> solution['s'] +/// +/// >>> solution[0] +/// +#[pyclass(unsendable, name = QuerySolution)] +pub struct PyQuerySolution { + inner: QuerySolution, +} + +#[pyproto] +impl PyObjectProtocol for PyQuerySolution { + fn __repr__(&self) -> String { + let mut buffer = String::new(); + buffer.push_str("'); + buffer + } +} + +#[pyproto] +impl PyMappingProtocol for PyQuerySolution { + fn __len__(&self) -> usize { + self.inner.len() + } + + fn __getitem__(&self, input: &PyAny) -> PyResult> { + if let Ok(key) = usize::extract(input) { + Ok(self + .inner + .get(key) + .map(|term| term_to_python(input.py(), term.clone()))) + } else if let Ok(key) = <&str>::extract(input) { + Ok(self + .inner + .get(key) + .map(|term| term_to_python(input.py(), term.clone()))) + } else if let Ok(key) = input.downcast::>() { + let key = &*key.borrow(); + Ok(self + .inner + .get(<&Variable>::from(key)) + .map(|term| term_to_python(input.py(), term.clone()))) + } else { + Err(TypeError::py_err(format!( + "{} is not an integer of a string", + input.get_type().name(), + ))) + } + } +} + +/// An iterator of :py:class:`QuerySolution` returned by a SPARQL ``SELECT`` query +/// +/// >>> store = SledStore() +/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) +/// >>> list(store.query('SELECT ?s WHERE { ?s ?p ?o }')) +/// [>] +#[pyclass(unsendable, name = QuerySolutions)] +pub struct PyQuerySolutions { + inner: QuerySolutionIter, +} + +#[pymethods] +impl PyQuerySolutions { + /// :return: the ordered list of all variables that could appear in the query results + /// :rtype: list(Variable) + /// + /// >>> store = SledStore() + /// >>> store.query('SELECT ?s WHERE { ?s ?p ?o }').variables + /// [] + #[getter] + fn variables(&self) -> Vec { + self.inner + .variables() + .iter() + .map(|v| v.clone().into()) + .collect() + } +} + +#[pyproto] +impl PyIterProtocol for PyQuerySolutions { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> PyResult> { + Ok(slf + .inner + .next() + .transpose() + .map_err(map_evaluation_error)? + .map(move |inner| PyQuerySolution { inner })) + } +} + +/// An iterator of :py:class:`Triple` returned by a SPARQL ``CONSTRUCT`` or ``DESCRIBE`` query +/// +/// >>> store = MemoryStore() +/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) +/// >>> list(store.query('CONSTRUCT WHERE { ?s ?p ?o }')) +/// [ predicate= object=>>] +#[pyclass(unsendable, name = QueryTriples)] +pub struct PyQueryTriples { + inner: QueryTripleIter, +} + +#[pyproto] +impl PyIterProtocol for PyQueryTriples { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> PyResult> { + Ok(slf + .inner + .next() + .transpose() + .map_err(map_evaluation_error)? + .map(|t| t.into())) + } +} + +pub fn map_evaluation_error(error: EvaluationError) -> PyErr { + match error { + EvaluationError::Parsing(error) => SyntaxError::py_err(error.to_string()), + EvaluationError::Io(error) => map_io_err(error), + EvaluationError::Query(error) => ValueError::py_err(error.to_string()), + _ => RuntimeError::py_err(error.to_string()), + } +} diff --git a/python/src/store_utils.rs b/python/src/store_utils.rs index 694bd836..c483f719 100644 --- a/python/src/store_utils.rs +++ b/python/src/store_utils.rs @@ -1,11 +1,7 @@ use crate::model::*; use oxigraph::model::*; -use oxigraph::sparql::{ - EvaluationError, QueryOptions, QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter, -}; -use pyo3::exceptions::{IOError, RuntimeError, SyntaxError, TypeError, ValueError}; +use pyo3::exceptions::{IOError, SyntaxError, ValueError}; use pyo3::prelude::*; -use pyo3::{PyIterProtocol, PyMappingProtocol, PyNativeType, PyObjectProtocol}; use std::io; pub fn extract_quads_pattern( @@ -47,135 +43,6 @@ pub fn extract_quads_pattern( )) } -pub fn build_query_options( - use_default_graph_as_union: bool, - default_graph_uris: Option>, - named_graph_uris: Option>, -) -> PyResult { - let mut options = QueryOptions::default(); - if use_default_graph_as_union { - options = options.with_default_graph_as_union(); - } - if let Some(default_graph_uris) = default_graph_uris { - if default_graph_uris.is_empty() { - return Err(ValueError::py_err( - "The list of the default graph URIs could not be empty", - )); - } - for default_graph_uri in default_graph_uris { - options = options.with_default_graph(default_graph_uri); - } - } - if let Some(named_graph_uris) = named_graph_uris { - if named_graph_uris.is_empty() { - return Err(ValueError::py_err( - "The list of the named graph URIs could not be empty", - )); - } - for named_graph_uri in named_graph_uris { - options = options.with_named_graph(named_graph_uri); - } - } - Ok(options) -} - -pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyResult { - Ok(match results { - QueryResults::Solutions(inner) => PyQuerySolutionIter { inner }.into_py(py), - QueryResults::Graph(inner) => PyQueryTripleIter { inner }.into_py(py), - QueryResults::Boolean(b) => b.into_py(py), - }) -} - -#[pyclass(unsendable, name = QuerySolution)] -pub struct PyQuerySolution { - inner: QuerySolution, -} - -#[pyproto] -impl PyObjectProtocol for PyQuerySolution { - fn __repr__(&self) -> String { - let mut buffer = String::new(); - buffer.push_str("'); - buffer - } -} - -#[pyproto] -impl PyMappingProtocol for PyQuerySolution { - fn __len__(&self) -> usize { - self.inner.len() - } - - fn __getitem__(&self, input: &PyAny) -> PyResult> { - if let Ok(key) = usize::extract(input) { - Ok(self - .inner - .get(key) - .map(|term| term_to_python(input.py(), term.clone()))) - } else if let Ok(key) = <&str>::extract(input) { - Ok(self - .inner - .get(key) - .map(|term| term_to_python(input.py(), term.clone()))) - } else { - Err(TypeError::py_err(format!( - "{} is not an integer of a string", - input.get_type().name(), - ))) - } - } -} - -#[pyclass(unsendable, name = QuerySolutionIter)] -pub struct PyQuerySolutionIter { - inner: QuerySolutionIter, -} - -#[pyproto] -impl PyIterProtocol for PyQuerySolutionIter { - fn __iter__(slf: PyRefMut) -> Py { - slf.into() - } - - fn __next__(mut slf: PyRefMut) -> PyResult> { - Ok(slf - .inner - .next() - .transpose() - .map_err(map_evaluation_error)? - .map(move |inner| PyQuerySolution { inner })) - } -} - -#[pyclass(unsendable, name = QueryTripleIter)] -pub struct PyQueryTripleIter { - inner: QueryTripleIter, -} - -#[pyproto] -impl PyIterProtocol for PyQueryTripleIter { - fn __iter__(slf: PyRefMut) -> Py { - slf.into() - } - - fn __next__(mut slf: PyRefMut) -> PyResult> { - Ok(slf - .inner - .next() - .transpose() - .map_err(map_evaluation_error)? - .map(|t| t.into())) - } -} - pub fn map_io_err(error: io::Error) -> PyErr { match error.kind() { io::ErrorKind::InvalidInput => ValueError::py_err(error.to_string()), @@ -185,12 +52,3 @@ pub fn map_io_err(error: io::Error) -> PyErr { _ => IOError::py_err(error.to_string()), } } - -pub fn map_evaluation_error(error: EvaluationError) -> PyErr { - match error { - EvaluationError::Parsing(error) => SyntaxError::py_err(error.to_string()), - EvaluationError::Io(error) => map_io_err(error), - EvaluationError::Query(error) => ValueError::py_err(error.to_string()), - _ => RuntimeError::py_err(error.to_string()), - } -} diff --git a/python/tests/test_model.py b/python/tests/test_model.py index 72587612..70f932f7 100644 --- a/python/tests/test_model.py +++ b/python/tests/test_model.py @@ -180,5 +180,17 @@ class TestQuad(unittest.TestCase): ) +class TestVariable(unittest.TestCase): + def test_constructor(self): + self.assertEqual(Variable("foo").value, "foo") + + def test_string(self): + self.assertEqual(str(Variable("foo")), "?foo") + + def test_equal(self): + self.assertEqual(Variable("foo"), Variable("foo")) + self.assertNotEqual(Variable("foo"), Variable("bar")) + + if __name__ == "__main__": unittest.main() diff --git a/python/tests/test_store.py b/python/tests/test_store.py index a8b7c8d5..8d2aeeaa 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -68,7 +68,8 @@ class TestAbstractStore(unittest.TestCase, ABC): {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, ) self.assertEqual( - set(store.quads_for_pattern(None, None, None, graph)), {Quad(foo, bar, baz, graph)}, + set(store.quads_for_pattern(None, None, None, graph)), + {Quad(foo, bar, baz, graph)}, ) self.assertEqual( set(store.quads_for_pattern(foo, None, None, DefaultGraph())), @@ -84,32 +85,51 @@ class TestAbstractStore(unittest.TestCase, ABC): def test_construct_query(self): store = self.store() store.add(Quad(foo, bar, baz)) + results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }") + self.assertIsInstance(results, QueryTriples) self.assertEqual( - set(store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }")), - {Triple(foo, bar, baz)}, + set(results), {Triple(foo, bar, baz)}, ) def test_select_query(self): store = self.store() store.add(Quad(foo, bar, baz)) - results = list(store.query("SELECT ?s WHERE { ?s ?p ?o }")) - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0], foo) - self.assertEqual(results[0]["s"], foo) + solutions = store.query("SELECT ?s WHERE { ?s ?p ?o }") + self.assertIsInstance(solutions, QuerySolutions) + self.assertEqual(solutions.variables, [Variable("s")]) + solution = next(solutions) + self.assertIsInstance(solution, QuerySolution) + self.assertEqual(solution[0], foo) + self.assertEqual(solution["s"], foo) + self.assertEqual(solution[Variable("s")], foo) def test_select_query_union_default_graph(self): store = self.store() store.add(Quad(foo, bar, baz, graph)) self.assertEqual(len(list(store.query("SELECT ?s WHERE { ?s ?p ?o }"))), 0) - self.assertEqual(len(list(store.query("SELECT ?s WHERE { ?s ?p ?o }", use_default_graph_as_union=True))), 1) - self.assertEqual(len(list(store.query("SELECT ?s WHERE { ?s ?p ?o }", use_default_graph_as_union=True, named_graph_uris=[graph]))), 1) + results = store.query( + "SELECT ?s WHERE { ?s ?p ?o }", use_default_graph_as_union=True + ) + self.assertEqual(len(list(results)), 1) + results = store.query( + "SELECT ?s WHERE { ?s ?p ?o }", + use_default_graph_as_union=True, + named_graph_uris=[graph], + ) + self.assertEqual(len(list(results)), 1) def test_select_query_with_default_graph(self): store = self.store() store.add(Quad(foo, bar, baz, graph)) self.assertEqual(len(list(store.query("SELECT ?s WHERE { ?s ?p ?o }"))), 0) - self.assertEqual(len(list(store.query("SELECT ?s WHERE { ?s ?p ?o }", default_graph_uris=[graph]))), 1) - self.assertEqual(len(list(store.query("SELECT ?s WHERE { GRAPH ?g { ?s ?p ?o } }", named_graph_uris=[graph]))), 1) + results = store.query( + "SELECT ?s WHERE { ?s ?p ?o }", default_graph_uris=[graph] + ) + self.assertEqual(len(list(results)), 1) + results = store.query( + "SELECT ?s WHERE { GRAPH ?g { ?s ?p ?o } }", named_graph_uris=[graph], + ) + self.assertEqual(len(list(results)), 1) def test_load_ntriples_to_default_graph(self): store = self.store() @@ -160,8 +180,7 @@ class TestAbstractStore(unittest.TestCase, ABC): output = BytesIO() store.dump(output, "application/n-triples", from_graph=graph) self.assertEqual( - output.getvalue(), - b" .\n", + output.getvalue(), b" .\n", ) def test_dump_nquads(self):