parent
2998f795fd
commit
f7023a381e
@ -0,0 +1,322 @@ |
||||
use crate::model::{hash, PyGraphNameRef, PyNamedNodeRef, PyQuad, PySubjectRef, PyTermRef}; |
||||
use oxigraph::model::dataset::{CanonicalizationAlgorithm, Dataset}; |
||||
use oxigraph::model::{Quad, QuadRef}; |
||||
use pyo3::exceptions::PyKeyError; |
||||
use pyo3::prelude::*; |
||||
|
||||
/// An in-memory `RDF dataset <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset>`_.
|
||||
///
|
||||
/// It can accommodate a fairly large number of quads (in the few millions).
|
||||
///
|
||||
/// Use :py:class:`Store` if you need on-disk persistence or SPARQL.
|
||||
///
|
||||
/// Warning: It interns the strings and does not do any garbage collection yet:
|
||||
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
|
||||
///
|
||||
/// :param quads: some quads to initialize the dataset with.
|
||||
/// :type quads: collections.abc.Iterable[Quad]
|
||||
///
|
||||
/// The :py:class:`str` function provides an N-Quads serialization:
|
||||
///
|
||||
/// >>> str(Dataset([Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))]))
|
||||
/// '<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n'
|
||||
#[pyclass(name = "Dataset", module = "pyoxigraph")] |
||||
#[derive(Eq, PartialEq, Debug, Clone)] |
||||
pub struct PyDataset { |
||||
inner: Dataset, |
||||
} |
||||
|
||||
#[pymethods] |
||||
impl PyDataset { |
||||
#[new] |
||||
#[pyo3(signature = (quads = None))] |
||||
fn new(quads: Option<&PyAny>) -> PyResult<Self> { |
||||
let mut inner = Dataset::new(); |
||||
if let Some(quads) = quads { |
||||
for quad in quads.iter()? { |
||||
inner.insert(&*quad?.extract::<PyRef<'_, PyQuad>>()?); |
||||
} |
||||
} |
||||
Ok(Self { inner }) |
||||
} |
||||
|
||||
/// Looks for the quads with the given subject.
|
||||
///
|
||||
/// :param subject: the quad subject.
|
||||
/// :type subject: NamedNode or BlankNode or Triple
|
||||
/// :return: an iterator of the quads.
|
||||
/// :rtype: collections.abc.Iterator[Quad]
|
||||
///
|
||||
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||
/// >>> list(store.quads_for_subject(NamedNode('http://example.com')))
|
||||
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||
pub fn quads_for_subject(&self, subject: &PyAny) -> PyResult<QuadIter> { |
||||
Ok(QuadIter { |
||||
inner: self |
||||
.inner |
||||
.quads_for_subject(&PySubjectRef::try_from(subject)?) |
||||
.map(QuadRef::into_owned) |
||||
.collect::<Vec<_>>() |
||||
.into_iter(), |
||||
}) |
||||
} |
||||
|
||||
/// Looks for the quads with the given predicate.
|
||||
///
|
||||
/// :param predicate: the quad predicate.
|
||||
/// :type subject: NamedNode
|
||||
/// :return: an iterator of the quads.
|
||||
/// :rtype: collections.abc.Iterator[Quad]
|
||||
///
|
||||
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||
/// >>> list(store.quads_for_predicate(NamedNode('http://example.com/p')))
|
||||
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||
pub fn quads_for_predicate(&self, predicate: &PyAny) -> PyResult<QuadIter> { |
||||
Ok(QuadIter { |
||||
inner: self |
||||
.inner |
||||
.quads_for_predicate(&PyNamedNodeRef::try_from(predicate)?) |
||||
.map(QuadRef::into_owned) |
||||
.collect::<Vec<_>>() |
||||
.into_iter(), |
||||
}) |
||||
} |
||||
|
||||
/// Looks for the quads with the given object.
|
||||
///
|
||||
/// :param object: the quad object.
|
||||
/// :type object: NamedNode or BlankNode or Literal or Triple
|
||||
/// :return: an iterator of the quads.
|
||||
/// :rtype: collections.abc.Iterator[Quad]
|
||||
///
|
||||
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||
/// >>> list(store.quads_for_object(Literal('1')))
|
||||
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||
pub fn quads_for_object(&self, object: &PyAny) -> PyResult<QuadIter> { |
||||
Ok(QuadIter { |
||||
inner: self |
||||
.inner |
||||
.quads_for_object(&PyTermRef::try_from(object)?) |
||||
.map(QuadRef::into_owned) |
||||
.collect::<Vec<_>>() |
||||
.into_iter(), |
||||
}) |
||||
} |
||||
|
||||
/// Looks for the quads with the given graph name.
|
||||
///
|
||||
/// :param graph_name: the quad graph name.
|
||||
/// :type graph_name: NamedNode or BlankNode or Literal or Triple
|
||||
/// :return: an iterator of the quads.
|
||||
/// :rtype: collections.abc.Iterator[Quad]
|
||||
///
|
||||
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
|
||||
/// >>> list(store.quads_for_graph_name(NamedNode('http://example.com/g')))
|
||||
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
|
||||
pub fn quads_for_graph_name(&self, graph_name: &PyAny) -> PyResult<QuadIter> { |
||||
Ok(QuadIter { |
||||
inner: self |
||||
.inner |
||||
.quads_for_graph_name(&PyGraphNameRef::try_from(graph_name)?) |
||||
.map(QuadRef::into_owned) |
||||
.collect::<Vec<_>>() |
||||
.into_iter(), |
||||
}) |
||||
} |
||||
|
||||
/// Adds a quad to the dataset.
|
||||
///
|
||||
/// :param quad: the quad to add.
|
||||
/// :type quad: Quad
|
||||
/// :rtype: None
|
||||
///
|
||||
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||
/// >>> dataset = Dataset()
|
||||
/// >>> dataset.add(quad)
|
||||
/// >>> quad in dataset
|
||||
/// True
|
||||
fn add(&mut self, quad: &PyQuad) { |
||||
self.inner.insert(quad); |
||||
} |
||||
|
||||
/// Removes a quad from the dataset and raises an exception if it is not in the set.
|
||||
///
|
||||
/// :param quad: the quad to remove.
|
||||
/// :type quad: Quad
|
||||
/// :rtype: None
|
||||
/// :raises KeyError: if the element was not in the set.
|
||||
///
|
||||
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||
/// >>> dataset = Dataset([quad])
|
||||
/// >>> dataset.remove(quad)
|
||||
/// >>> quad in dataset
|
||||
/// False
|
||||
fn remove(&mut self, quad: &PyQuad) -> PyResult<()> { |
||||
if self.inner.remove(quad) { |
||||
Ok(()) |
||||
} else { |
||||
Err(PyKeyError::new_err(format!( |
||||
"{} is not in the Dataset", |
||||
QuadRef::from(quad) |
||||
))) |
||||
} |
||||
} |
||||
|
||||
/// Removes a quad from the dataset if it is present.
|
||||
///
|
||||
/// :param quad: the quad to remove.
|
||||
/// :type quad: Quad
|
||||
/// :rtype: None
|
||||
///
|
||||
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||
/// >>> dataset = Dataset([quad])
|
||||
/// >>> dataset.discard(quad)
|
||||
/// >>> quad in dataset
|
||||
/// False
|
||||
fn discard(&mut self, quad: &PyQuad) { |
||||
self.inner.remove(quad); |
||||
} |
||||
|
||||
/// Removes all quads from the dataset.
|
||||
///
|
||||
/// :rtype: None
|
||||
///
|
||||
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
|
||||
/// >>> dataset = Dataset([quad])
|
||||
/// >>> dataset.clear()
|
||||
/// >>> len(dataset)
|
||||
/// 0
|
||||
fn clear(&mut self) { |
||||
self.inner.clear() |
||||
} |
||||
|
||||
/// Canonicalizes the dataset by renaming blank nodes.
|
||||
///
|
||||
/// Warning: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
|
||||
/// Hence, this canonization might not be suitable for diffs.
|
||||
///
|
||||
/// Warning: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
|
||||
///
|
||||
/// :param algorithm: the canonicalization algorithm to use.
|
||||
/// :type algorithm: CanonicalizationAlgorithm
|
||||
/// :rtype: None
|
||||
///
|
||||
/// >>> d1 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
|
||||
/// >>> d2 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
|
||||
/// >>> d1 == d2
|
||||
/// False
|
||||
/// >>> d1.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
|
||||
/// >>> d2.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
|
||||
/// >>> d1 == d2
|
||||
/// True
|
||||
fn canonicalize(&mut self, algorithm: &PyCanonicalizationAlgorithm) { |
||||
self.inner.canonicalize(algorithm.inner) |
||||
} |
||||
|
||||
fn __str__(&self) -> String { |
||||
self.inner.to_string() |
||||
} |
||||
|
||||
fn __bool__(&self) -> bool { |
||||
self.inner.is_empty() |
||||
} |
||||
|
||||
fn __eq__(&self, other: &Self) -> bool { |
||||
self.inner == other.inner |
||||
} |
||||
|
||||
fn __ne__(&self, other: &Self) -> bool { |
||||
self.inner != other.inner |
||||
} |
||||
|
||||
fn __len__(&self) -> usize { |
||||
self.inner.len() |
||||
} |
||||
|
||||
fn __contains__(&self, quad: &PyQuad) -> bool { |
||||
self.inner.contains(quad) |
||||
} |
||||
|
||||
fn __iter__(&self) -> QuadIter { |
||||
// TODO: very inefficient
|
||||
QuadIter { |
||||
inner: self |
||||
.inner |
||||
.iter() |
||||
.map(QuadRef::into_owned) |
||||
.collect::<Vec<_>>() |
||||
.into_iter(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
#[pyclass(unsendable, module = "pyoxigraph")] |
||||
pub struct QuadIter { |
||||
inner: std::vec::IntoIter<Quad>, |
||||
} |
||||
|
||||
#[pymethods] |
||||
impl QuadIter { |
||||
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { |
||||
slf |
||||
} |
||||
|
||||
fn __next__(&mut self) -> Option<PyQuad> { |
||||
Some(self.inner.next()?.into()) |
||||
} |
||||
} |
||||
|
||||
/// RDF canonicalization algorithms.
|
||||
///
|
||||
/// The following algorithms are supported:
|
||||
/// * :py:attr:`CanonicalizationAlgorithm.UNSTABLE`: an unstable algorithm preferred by PyOxigraph.
|
||||
#[pyclass(name = "CanonicalizationAlgorithm", module = "pyoxigraph")] |
||||
#[derive(Clone)] |
||||
pub struct PyCanonicalizationAlgorithm { |
||||
inner: CanonicalizationAlgorithm, |
||||
} |
||||
|
||||
#[pymethods] |
||||
impl PyCanonicalizationAlgorithm { |
||||
/// The algorithm preferred by PyOxigraph.
|
||||
///
|
||||
/// Warning: Might change between Oxigraph versions. No stability guaranties.
|
||||
#[classattr] |
||||
const UNSTABLE: Self = Self { |
||||
inner: CanonicalizationAlgorithm::Unstable, |
||||
}; |
||||
|
||||
fn __repr__(&self) -> String { |
||||
format!( |
||||
"<CanonicalizationAlgorithm {}>", |
||||
match self.inner { |
||||
CanonicalizationAlgorithm::Unstable => "unstable", |
||||
_ => "unknown", |
||||
} |
||||
) |
||||
} |
||||
|
||||
fn __hash__(&self) -> u64 { |
||||
hash(&self.inner) |
||||
} |
||||
|
||||
fn __eq__(&self, other: &Self) -> bool { |
||||
self.inner == other.inner |
||||
} |
||||
|
||||
fn __ne__(&self, other: &Self) -> bool { |
||||
self.inner != other.inner |
||||
} |
||||
|
||||
/// :rtype: CanonicalizationAlgorithm
|
||||
fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { |
||||
slf |
||||
} |
||||
|
||||
/// :type memo: typing.Any
|
||||
/// :rtype: CanonicalizationAlgorithm
|
||||
#[allow(unused_variables)] |
||||
fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ PyAny) -> PyRef<'a, Self> { |
||||
slf |
||||
} |
||||
} |
Loading…
Reference in new issue