parent
2998f795fd
commit
f7023a381e
@ -0,0 +1,322 @@ |
|||||||
|
use crate::model::{hash, PyGraphNameRef, PyNamedNodeRef, PyQuad, PySubjectRef, PyTermRef}; |
||||||
|
use oxigraph::model::dataset::{CanonicalizationAlgorithm, Dataset}; |
||||||
|
use oxigraph::model::{Quad, QuadRef}; |
||||||
|
use pyo3::exceptions::PyKeyError; |
||||||
|
use pyo3::prelude::*; |
||||||
|
|
||||||
|
/// An in-memory `RDF dataset <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset>`_.
|
||||||
|
///
|
||||||
|
/// It can accommodate a fairly large number of quads (in the few millions).
|
||||||
|
///
|
||||||
|
/// Use :py:class:`Store` if you need on-disk persistence or SPARQL.
|
||||||
|
///
|
||||||
|
/// Warning: It interns the strings and does not do any garbage collection yet:
|
||||||
|
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
|
||||||
|
///
|
||||||
|
/// :param quads: some quads to initialize the dataset with.
|
||||||
|
/// :type quads: collections.abc.Iterable[Quad]
|
||||||
|
///
|
||||||
|
/// The :py:class:`str` function provides an N-Quads serialization:
|
||||||
|
///
|
||||||
|
/// >>> str(Dataset([Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))]))
|
||||||
|
/// '<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n'
|
||||||
|
#[pyclass(name = "Dataset", module = "pyoxigraph")] |
||||||
|
#[derive(Eq, PartialEq, Debug, Clone)] |
||||||
|
pub struct PyDataset { |
||||||
|
inner: Dataset, |
||||||
|
} |
||||||
|
|
||||||
|
#[pymethods] |
||||||
|
impl PyDataset { |
||||||
|
#[new] |
||||||
|
#[pyo3(signature = (quads = None))] |
||||||
|
fn new(quads: Option<&PyAny>) -> PyResult<Self> { |
||||||
|
let mut inner = Dataset::new(); |
||||||
|
if let Some(quads) = quads { |
||||||
|
for quad in quads.iter()? { |
||||||
|
inner.insert(&*quad?.extract::<PyRef<'_, PyQuad>>()?); |
||||||
|
} |
||||||
|
} |
||||||
|
Ok(Self { inner }) |
||||||
|
} |
||||||
|
|
||||||
|
/// Looks for the quads with the given subject.
|
||||||
|
///
|
||||||
|
/// :param subject: the quad subject.
|
||||||
|
/// :type subject: NamedNode or BlankNode or Triple
|
||||||
|
/// :return: an iterator of the quads.
|
||||||
|
/// :rtype: collections.abc.Iterator[Quad]
|
||||||
|
///
|
||||||
|
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||||
|
/// >>> list(store.quads_for_subject(NamedNode('http://example.com')))
|
||||||
|
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||||
|
pub fn quads_for_subject(&self, subject: &PyAny) -> PyResult<QuadIter> { |
||||||
|
Ok(QuadIter { |
||||||
|
inner: self |
||||||
|
.inner |
||||||
|
.quads_for_subject(&PySubjectRef::try_from(subject)?) |
||||||
|
.map(QuadRef::into_owned) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.into_iter(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
/// Looks for the quads with the given predicate.
|
||||||
|
///
|
||||||
|
/// :param predicate: the quad predicate.
|
||||||
|
/// :type subject: NamedNode
|
||||||
|
/// :return: an iterator of the quads.
|
||||||
|
/// :rtype: collections.abc.Iterator[Quad]
|
||||||
|
///
|
||||||
|
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||||
|
/// >>> list(store.quads_for_predicate(NamedNode('http://example.com/p')))
|
||||||
|
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||||
|
pub fn quads_for_predicate(&self, predicate: &PyAny) -> PyResult<QuadIter> { |
||||||
|
Ok(QuadIter { |
||||||
|
inner: self |
||||||
|
.inner |
||||||
|
.quads_for_predicate(&PyNamedNodeRef::try_from(predicate)?) |
||||||
|
.map(QuadRef::into_owned) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.into_iter(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
/// Looks for the quads with the given object.
|
||||||
|
///
|
||||||
|
/// :param object: the quad object.
|
||||||
|
/// :type object: NamedNode or BlankNode or Literal or Triple
|
||||||
|
/// :return: an iterator of the quads.
|
||||||
|
/// :rtype: collections.abc.Iterator[Quad]
|
||||||
|
///
|
||||||
|
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||||
|
/// >>> list(store.quads_for_object(Literal('1')))
|
||||||
|
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||||
|
pub fn quads_for_object(&self, object: &PyAny) -> PyResult<QuadIter> { |
||||||
|
Ok(QuadIter { |
||||||
|
inner: self |
||||||
|
.inner |
||||||
|
.quads_for_object(&PyTermRef::try_from(object)?) |
||||||
|
.map(QuadRef::into_owned) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.into_iter(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
/// Looks for the quads with the given graph name.
|
||||||
|
///
|
||||||
|
/// :param graph_name: the quad graph name.
|
||||||
|
/// :type graph_name: NamedNode or BlankNode or Literal or Triple
|
||||||
|
/// :return: an iterator of the quads.
|
||||||
|
/// :rtype: collections.abc.Iterator[Quad]
|
||||||
|
///
|
||||||
|
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||||
|
/// >>> list(store.quads_for_graph_name(NamedNode('http://example.com/g')))
|
||||||
|
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||||
|
pub fn quads_for_graph_name(&self, graph_name: &PyAny) -> PyResult<QuadIter> { |
||||||
|
Ok(QuadIter { |
||||||
|
inner: self |
||||||
|
.inner |
||||||
|
.quads_for_graph_name(&PyGraphNameRef::try_from(graph_name)?) |
||||||
|
.map(QuadRef::into_owned) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.into_iter(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
/// Adds a quad to the dataset.
|
||||||
|
///
|
||||||
|
/// :param quad: the quad to add.
|
||||||
|
/// :type quad: Quad
|
||||||
|
/// :rtype: None
|
||||||
|
///
|
||||||
|
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||||
|
/// >>> dataset = Dataset()
|
||||||
|
/// >>> dataset.add(quad)
|
||||||
|
/// >>> quad in dataset
|
||||||
|
/// True
|
||||||
|
fn add(&mut self, quad: &PyQuad) { |
||||||
|
self.inner.insert(quad); |
||||||
|
} |
||||||
|
|
||||||
|
/// Removes a quad from the dataset and raises an exception if it is not in the set.
|
||||||
|
///
|
||||||
|
/// :param quad: the quad to remove.
|
||||||
|
/// :type quad: Quad
|
||||||
|
/// :rtype: None
|
||||||
|
/// :raises KeyError: if the element was not in the set.
|
||||||
|
///
|
||||||
|
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||||
|
/// >>> dataset = Dataset([quad])
|
||||||
|
/// >>> dataset.remove(quad)
|
||||||
|
/// >>> quad in dataset
|
||||||
|
/// False
|
||||||
|
fn remove(&mut self, quad: &PyQuad) -> PyResult<()> { |
||||||
|
if self.inner.remove(quad) { |
||||||
|
Ok(()) |
||||||
|
} else { |
||||||
|
Err(PyKeyError::new_err(format!( |
||||||
|
"{} is not in the Dataset", |
||||||
|
QuadRef::from(quad) |
||||||
|
))) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Removes a quad from the dataset if it is present.
|
||||||
|
///
|
||||||
|
/// :param quad: the quad to remove.
|
||||||
|
/// :type quad: Quad
|
||||||
|
/// :rtype: None
|
||||||
|
///
|
||||||
|
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||||
|
/// >>> dataset = Dataset([quad])
|
||||||
|
/// >>> dataset.discard(quad)
|
||||||
|
/// >>> quad in dataset
|
||||||
|
/// False
|
||||||
|
fn discard(&mut self, quad: &PyQuad) { |
||||||
|
self.inner.remove(quad); |
||||||
|
} |
||||||
|
|
||||||
|
/// Removes all quads from the dataset.
|
||||||
|
///
|
||||||
|
/// :rtype: None
|
||||||
|
///
|
||||||
|
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||||
|
/// >>> dataset = Dataset([quad])
|
||||||
|
/// >>> dataset.clear()
|
||||||
|
/// >>> len(dataset)
|
||||||
|
/// 0
|
||||||
|
fn clear(&mut self) { |
||||||
|
self.inner.clear() |
||||||
|
} |
||||||
|
|
||||||
|
/// Canonicalizes the dataset by renaming blank nodes.
|
||||||
|
///
|
||||||
|
/// Warning: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
|
||||||
|
/// Hence, this canonization might not be suitable for diffs.
|
||||||
|
///
|
||||||
|
/// Warning: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
|
||||||
|
///
|
||||||
|
/// :param algorithm: the canonicalization algorithm to use.
|
||||||
|
/// :type algorithm: CanonicalizationAlgorithm
|
||||||
|
/// :rtype: None
|
||||||
|
///
|
||||||
|
/// >>> d1 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
|
||||||
|
/// >>> d2 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
|
||||||
|
/// >>> d1 == d2
|
||||||
|
/// False
|
||||||
|
/// >>> d1.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
|
||||||
|
/// >>> d2.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
|
||||||
|
/// >>> d1 == d2
|
||||||
|
/// True
|
||||||
|
fn canonicalize(&mut self, algorithm: &PyCanonicalizationAlgorithm) { |
||||||
|
self.inner.canonicalize(algorithm.inner) |
||||||
|
} |
||||||
|
|
||||||
|
fn __str__(&self) -> String { |
||||||
|
self.inner.to_string() |
||||||
|
} |
||||||
|
|
||||||
|
fn __bool__(&self) -> bool { |
||||||
|
self.inner.is_empty() |
||||||
|
} |
||||||
|
|
||||||
|
fn __eq__(&self, other: &Self) -> bool { |
||||||
|
self.inner == other.inner |
||||||
|
} |
||||||
|
|
||||||
|
fn __ne__(&self, other: &Self) -> bool { |
||||||
|
self.inner != other.inner |
||||||
|
} |
||||||
|
|
||||||
|
fn __len__(&self) -> usize { |
||||||
|
self.inner.len() |
||||||
|
} |
||||||
|
|
||||||
|
fn __contains__(&self, quad: &PyQuad) -> bool { |
||||||
|
self.inner.contains(quad) |
||||||
|
} |
||||||
|
|
||||||
|
fn __iter__(&self) -> QuadIter { |
||||||
|
// TODO: very inefficient
|
||||||
|
QuadIter { |
||||||
|
inner: self |
||||||
|
.inner |
||||||
|
.iter() |
||||||
|
.map(QuadRef::into_owned) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.into_iter(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[pyclass(unsendable, module = "pyoxigraph")] |
||||||
|
pub struct QuadIter { |
||||||
|
inner: std::vec::IntoIter<Quad>, |
||||||
|
} |
||||||
|
|
||||||
|
#[pymethods] |
||||||
|
impl QuadIter { |
||||||
|
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { |
||||||
|
slf |
||||||
|
} |
||||||
|
|
||||||
|
fn __next__(&mut self) -> Option<PyQuad> { |
||||||
|
Some(self.inner.next()?.into()) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// RDF canonicalization algorithms.
|
||||||
|
///
|
||||||
|
/// The following algorithms are supported:
|
||||||
|
/// * :py:attr:`CanonicalizationAlgorithm.UNSTABLE`: an unstable algorithm preferred by PyOxigraph.
|
||||||
|
#[pyclass(name = "CanonicalizationAlgorithm", module = "pyoxigraph")] |
||||||
|
#[derive(Clone)] |
||||||
|
pub struct PyCanonicalizationAlgorithm { |
||||||
|
inner: CanonicalizationAlgorithm, |
||||||
|
} |
||||||
|
|
||||||
|
#[pymethods] |
||||||
|
impl PyCanonicalizationAlgorithm { |
||||||
|
/// The algorithm preferred by PyOxigraph.
|
||||||
|
///
|
||||||
|
/// Warning: Might change between Oxigraph versions. No stability guaranties.
|
||||||
|
#[classattr] |
||||||
|
const UNSTABLE: Self = Self { |
||||||
|
inner: CanonicalizationAlgorithm::Unstable, |
||||||
|
}; |
||||||
|
|
||||||
|
fn __repr__(&self) -> String { |
||||||
|
format!( |
||||||
|
"<CanonicalizationAlgorithm {}>", |
||||||
|
match self.inner { |
||||||
|
CanonicalizationAlgorithm::Unstable => "unstable", |
||||||
|
_ => "unknown", |
||||||
|
} |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn __hash__(&self) -> u64 { |
||||||
|
hash(&self.inner) |
||||||
|
} |
||||||
|
|
||||||
|
fn __eq__(&self, other: &Self) -> bool { |
||||||
|
self.inner == other.inner |
||||||
|
} |
||||||
|
|
||||||
|
fn __ne__(&self, other: &Self) -> bool { |
||||||
|
self.inner != other.inner |
||||||
|
} |
||||||
|
|
||||||
|
/// :rtype: CanonicalizationAlgorithm
|
||||||
|
fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { |
||||||
|
slf |
||||||
|
} |
||||||
|
|
||||||
|
/// :type memo: typing.Any
|
||||||
|
/// :rtype: CanonicalizationAlgorithm
|
||||||
|
#[allow(unused_variables)] |
||||||
|
fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ PyAny) -> PyRef<'a, Self> { |
||||||
|
slf |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue