Introduces a TermEncoder struct

Will allow configuring term encoding in the future
encoder
Tpt 3 years ago
parent 722836c1ec
commit f7cc600054
  1. 34
      lib/oxrdf/src/triple.rs
  2. 21
      lib/src/sparql/dataset.rs
  3. 11
      lib/src/sparql/update.rs
  4. 5
      lib/src/storage/binary_encoder.rs
  5. 28
      lib/src/storage/mod.rs
  6. 211
      lib/src/storage/numeric_encoder.rs
  7. 40
      lib/src/store.rs

@ -921,6 +921,23 @@ impl From<BlankNodeRef<'_>> for GraphName {
}
}
impl From<NamedOrBlankNode> for GraphName {
#[inline]
fn from(node: NamedOrBlankNode) -> Self {
match node {
NamedOrBlankNode::NamedNode(node) => node.into(),
NamedOrBlankNode::BlankNode(node) => node.into(),
}
}
}
impl From<NamedOrBlankNodeRef<'_>> for GraphName {
#[inline]
fn from(node: NamedOrBlankNodeRef<'_>) -> Self {
node.into_owned().into()
}
}
/// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -995,6 +1012,23 @@ impl<'a> From<&'a BlankNode> for GraphNameRef<'a> {
}
}
impl<'a> From<NamedOrBlankNodeRef<'a>> for GraphNameRef<'a> {
#[inline]
fn from(node: NamedOrBlankNodeRef<'a>) -> Self {
match node {
NamedOrBlankNodeRef::NamedNode(node) => node.into(),
NamedOrBlankNodeRef::BlankNode(node) => node.into(),
}
}
}
impl<'a> From<&'a NamedOrBlankNode> for GraphNameRef<'a> {
#[inline]
fn from(node: &'a NamedOrBlankNode) -> Self {
node.as_ref().into()
}
}
impl<'a> From<&'a GraphName> for GraphNameRef<'a> {
#[inline]
fn from(node: &'a GraphName) -> Self {

@ -18,13 +18,20 @@ pub struct DatasetView {
impl DatasetView {
pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self {
let encoder = reader.term_encoder();
let dataset = EncodedDatasetSpec {
default: dataset
.default_graph_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
named: dataset
.available_named_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
default: dataset.default_graph_graphs().map(|graphs| {
graphs
.iter()
.map(|g| encoder.encode_graph_name(g))
.collect::<Vec<_>>()
}),
named: dataset.available_named_graphs().map(|graphs| {
graphs
.iter()
.map(|g| encoder.encode_term(g))
.collect::<Vec<_>>()
}),
};
Self {
reader,
@ -152,7 +159,7 @@ impl DatasetView {
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into();
let encoded = term.into();
let encoded = self.reader.term_encoder().encode_term(term);
insert_term(term, &encoded, &mut |key, value| {
self.insert_str(key, value);
Ok(())

@ -191,11 +191,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
fn eval_clear(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> {
match graph {
GraphTarget::NamedNode(graph_name) => {
if self
.transaction
.reader()
.contains_named_graph(&graph_name.as_ref().into())?
{
if self.transaction.reader().contains_named_graph(
&self
.transaction
.term_encoder()
.encode_graph_name(graph_name),
)? {
Ok(self.transaction.clear_graph(graph_name.into())?)
} else if silent {
Ok(())

@ -744,9 +744,10 @@ mod tests {
];
let decoder = TermDecoder::new(&store);
for term in terms {
let encoded = term.as_ref().into();
let encoder = TermEncoder::new();
let encoded = encoder.encode_term(&term);
assert_eq!(encoded, encoder.encode_term(&term));
store.insert_term(term.as_ref(), &encoded);
assert_eq!(encoded, term.as_ref().into());
assert_eq!(term, decoder.decode_term(&encoded).unwrap());
let mut buffer = Vec::new();

@ -10,7 +10,7 @@ use crate::storage::binary_encoder::{
};
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError};
use crate::storage::numeric_encoder::{
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder,
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder, TermEncoder,
};
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
use std::cmp::{max, min};
@ -294,6 +294,11 @@ pub struct StorageReader {
}
impl StorageReader {
#[allow(clippy::unused_self)]
pub fn term_encoder(&self) -> TermEncoder {
TermEncoder::new()
}
pub fn term_decoder(&self) -> TermDecoder<Self> {
TermDecoder::new(self)
}
@ -852,8 +857,13 @@ impl<'a> StorageWriter<'a> {
}
}
#[allow(clippy::unused_self)]
pub fn term_encoder(&self) -> TermEncoder {
TermEncoder::new()
}
pub fn insert(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> {
let encoded = quad.into();
let encoded = self.term_encoder().encode_quad(quad);
self.buffer.clear();
let result = if quad.graph_name.is_default_graph() {
write_spo_quad(&mut self.buffer, &encoded);
@ -941,7 +951,7 @@ impl<'a> StorageWriter<'a> {
&mut self,
graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, StorageError> {
let encoded_graph_name = graph_name.into();
let encoded_graph_name = self.term_encoder().encode_graph_name(graph_name);
self.buffer.clear();
write_term(&mut self.buffer, &encoded_graph_name);
@ -1005,7 +1015,7 @@ impl<'a> StorageWriter<'a> {
}
pub fn remove(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> {
self.remove_encoded(&quad.into())
self.remove_encoded(&self.term_encoder().encode_quad(quad))
}
fn remove_encoded(&mut self, quad: &EncodedQuad) -> Result<bool, StorageError> {
@ -1081,14 +1091,15 @@ impl<'a> StorageWriter<'a> {
self.remove_encoded(&quad?)?;
}
} else {
let graph_name = self.term_encoder().encode_graph_name(graph_name);
self.buffer.clear();
write_term(&mut self.buffer, &graph_name.into());
write_term(&mut self.buffer, &graph_name);
if self
.transaction
.contains_key_for_update(&self.storage.graphs_cf, &self.buffer)?
{
// The condition is useful to lock the graph itself and ensure no quad is inserted at the same time
for quad in self.reader().quads_for_graph(&graph_name.into()) {
for quad in self.reader().quads_for_graph(&graph_name) {
self.remove_encoded(&quad?)?;
}
}
@ -1114,7 +1125,7 @@ impl<'a> StorageWriter<'a> {
&mut self,
graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, StorageError> {
self.remove_encoded_named_graph(&graph_name.into())
self.remove_encoded_named_graph(&self.term_encoder().encode_graph_name(graph_name))
}
fn remove_encoded_named_graph(
@ -1327,8 +1338,9 @@ impl FileBulkLoader {
}
fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> {
let encoder = TermEncoder::new();
for quad in quads {
let encoded = EncodedQuad::from(quad.as_ref());
let encoded = encoder.encode_quad(&quad);
if quad.graph_name.is_default_graph() {
if self.triples.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;

@ -429,33 +429,88 @@ impl From<EncodedTriple> for EncodedTerm {
}
}
impl From<NamedNodeRef<'_>> for EncodedTerm {
fn from(named_node: NamedNodeRef<'_>) -> Self {
Self::NamedNode {
iri_id: StrHash::new(named_node.as_str()),
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTriple {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
}
impl EncodedTriple {
pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
}
impl From<BlankNodeRef<'_>> for EncodedTerm {
fn from(blank_node: BlankNodeRef<'_>) -> Self {
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
pub graph_name: EncodedTerm,
}
impl EncodedQuad {
pub fn new(
subject: EncodedTerm,
predicate: EncodedTerm,
object: EncodedTerm,
graph_name: EncodedTerm,
) -> Self {
Self {
subject,
predicate,
object,
graph_name,
}
}
}
pub struct TermEncoder {}
impl TermEncoder {
pub fn new() -> Self {
Self {}
}
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
match term.into() {
TermRef::NamedNode(named_node) => self.encode_named_node(named_node),
TermRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
TermRef::Literal(literal) => self.encode_literal(literal),
TermRef::Triple(triple) => self.encode_triple(triple.as_ref()).into(),
}
}
#[allow(clippy::unused_self)]
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> EncodedTerm {
EncodedTerm::NamedNode {
iri_id: StrHash::new(named_node.as_str()),
}
}
#[allow(clippy::unused_self)]
fn encode_blank_node(&self, blank_node: BlankNodeRef<'_>) -> EncodedTerm {
if let Some(id) = blank_node.unique_id() {
Self::NumericalBlankNode { id }
EncodedTerm::NumericalBlankNode { id }
} else {
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
Self::SmallBlankNode(id)
EncodedTerm::SmallBlankNode(id)
} else {
Self::BigBlankNode {
EncodedTerm::BigBlankNode {
id_id: StrHash::new(id),
}
}
}
}
}
impl From<LiteralRef<'_>> for EncodedTerm {
fn from(literal: LiteralRef<'_>) -> Self {
#[allow(clippy::unused_self)]
fn encode_literal(&self, literal: LiteralRef<'_>) -> EncodedTerm {
let value = literal.value();
let datatype = literal.datatype().as_str();
let native_encoding = match datatype {
@ -463,20 +518,20 @@ impl From<LiteralRef<'_>> for EncodedTerm {
literal.language().map(|language| {
if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
Self::SmallSmallLangStringLiteral { value, language }
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
Self::SmallBigLangStringLiteral {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: StrHash::new(language),
}
}
} else if let Ok(language) = SmallString::try_from(language) {
Self::BigSmallLangStringLiteral {
EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value),
language,
}
} else {
Self::BigBigLangStringLiteral {
EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value),
language_id: StrHash::new(language),
}
@ -487,9 +542,9 @@ impl From<LiteralRef<'_>> for EncodedTerm {
"http://www.w3.org/2001/XMLSchema#string" => {
let value = value;
Some(if let Ok(value) = SmallString::try_from(value) {
Self::SmallStringLiteral(value)
EncodedTerm::SmallStringLiteral(value)
} else {
Self::BigStringLiteral {
EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value),
}
})
@ -532,12 +587,12 @@ impl From<LiteralRef<'_>> for EncodedTerm {
Some(term) => term,
None => {
if let Ok(value) = SmallString::try_from(value) {
Self::SmallTypedLiteral {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: StrHash::new(datatype),
}
} else {
Self::BigTypedLiteral {
EncodedTerm::BigTypedLiteral {
value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype),
}
@ -545,112 +600,30 @@ impl From<LiteralRef<'_>> for EncodedTerm {
}
}
}
}
impl From<NamedOrBlankNodeRef<'_>> for EncodedTerm {
fn from(term: NamedOrBlankNodeRef<'_>) -> Self {
match term {
NamedOrBlankNodeRef::NamedNode(named_node) => named_node.into(),
NamedOrBlankNodeRef::BlankNode(blank_node) => blank_node.into(),
}
}
}
impl From<SubjectRef<'_>> for EncodedTerm {
fn from(term: SubjectRef<'_>) -> Self {
match term {
SubjectRef::NamedNode(named_node) => named_node.into(),
SubjectRef::BlankNode(blank_node) => blank_node.into(),
SubjectRef::Triple(triple) => triple.as_ref().into(),
}
}
}
impl From<TermRef<'_>> for EncodedTerm {
fn from(term: TermRef<'_>) -> Self {
match term {
TermRef::NamedNode(named_node) => named_node.into(),
TermRef::BlankNode(blank_node) => blank_node.into(),
TermRef::Literal(literal) => literal.into(),
TermRef::Triple(triple) => triple.as_ref().into(),
}
}
}
impl From<GraphNameRef<'_>> for EncodedTerm {
fn from(name: GraphNameRef<'_>) -> Self {
match name {
GraphNameRef::NamedNode(named_node) => named_node.into(),
GraphNameRef::BlankNode(blank_node) => blank_node.into(),
GraphNameRef::DefaultGraph => Self::DefaultGraph,
pub fn encode_graph_name<'a>(&self, name: impl Into<GraphNameRef<'a>>) -> EncodedTerm {
match name.into() {
GraphNameRef::NamedNode(named_node) => self.encode_named_node(named_node),
GraphNameRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
}
}
}
impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self {
Self::Triple(Rc::new(triple.into()))
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTriple {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
}
impl EncodedTriple {
pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
Self {
subject,
predicate,
object,
fn encode_triple(&self, triple: TripleRef<'_>) -> EncodedTriple {
EncodedTriple {
subject: self.encode_term(triple.subject),
predicate: self.encode_term(triple.predicate),
object: self.encode_term(triple.object),
}
}
}
impl From<TripleRef<'_>> for EncodedTriple {
fn from(triple: TripleRef<'_>) -> Self {
Self {
subject: triple.subject.into(),
predicate: triple.predicate.into(),
object: triple.object.into(),
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
pub graph_name: EncodedTerm,
}
impl EncodedQuad {
pub fn new(
subject: EncodedTerm,
predicate: EncodedTerm,
object: EncodedTerm,
graph_name: EncodedTerm,
) -> Self {
Self {
subject,
predicate,
object,
graph_name,
}
}
}
impl From<QuadRef<'_>> for EncodedQuad {
fn from(quad: QuadRef<'_>) -> Self {
Self {
subject: quad.subject.into(),
predicate: quad.predicate.into(),
object: quad.object.into(),
graph_name: quad.graph_name.into(),
pub fn encode_quad<'a>(&self, quad: impl Into<QuadRef<'a>>) -> EncodedQuad {
let quad = quad.into();
EncodedQuad {
subject: self.encode_term(quad.subject),
predicate: self.encode_term(quad.predicate),
object: self.encode_term(quad.object),
graph_name: self.encode_graph_name(quad.graph_name),
}
}
}

@ -32,7 +32,6 @@ use crate::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update,
UpdateOptions,
};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm};
#[cfg(not(target_arch = "wasm32"))]
use crate::storage::StorageBulkLoader;
use crate::storage::{
@ -184,12 +183,13 @@ impl Store {
graph_name: Option<GraphNameRef<'_>>,
) -> QuadIter {
let reader = self.storage.snapshot();
let encoder = reader.term_encoder();
QuadIter {
iter: reader.quads_for_pattern(
subject.map(EncodedTerm::from).as_ref(),
predicate.map(EncodedTerm::from).as_ref(),
object.map(EncodedTerm::from).as_ref(),
graph_name.map(EncodedTerm::from).as_ref(),
subject.map(|t| encoder.encode_term(t)).as_ref(),
predicate.map(|t| encoder.encode_term(t)).as_ref(),
object.map(|t| encoder.encode_term(t)).as_ref(),
graph_name.map(|t| encoder.encode_graph_name(t)).as_ref(),
),
reader,
}
@ -236,8 +236,9 @@ impl Store {
/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, StorageError> {
let quad = EncodedQuad::from(quad.into());
self.storage.snapshot().contains(&quad)
let reader = self.storage.snapshot();
let quad = reader.term_encoder().encode_quad(quad);
reader.contains(&quad)
}
/// Returns the number of quads in the store.
@ -611,8 +612,9 @@ impl Store {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, StorageError> {
let graph_name = EncodedTerm::from(graph_name.into());
self.storage.snapshot().contains_named_graph(&graph_name)
let reader = self.storage.snapshot();
let graph_name = reader.term_encoder().encode_graph_name(graph_name.into());
reader.contains_named_graph(&graph_name)
}
/// Inserts a graph into this store.
@ -899,12 +901,13 @@ impl<'a> Transaction<'a> {
graph_name: Option<GraphNameRef<'_>>,
) -> QuadIter {
let reader = self.writer.reader();
let encoder = reader.term_encoder();
QuadIter {
iter: reader.quads_for_pattern(
subject.map(EncodedTerm::from).as_ref(),
predicate.map(EncodedTerm::from).as_ref(),
object.map(EncodedTerm::from).as_ref(),
graph_name.map(EncodedTerm::from).as_ref(),
subject.map(|t| encoder.encode_term(t)).as_ref(),
predicate.map(|t| encoder.encode_term(t)).as_ref(),
object.map(|t| encoder.encode_term(t)).as_ref(),
graph_name.map(|t| encoder.encode_graph_name(t)).as_ref(),
),
reader,
}
@ -917,8 +920,9 @@ impl<'a> Transaction<'a> {
/// Checks if this store contains a given quad.
pub fn contains<'b>(&self, quad: impl Into<QuadRef<'b>>) -> Result<bool, StorageError> {
let quad = EncodedQuad::from(quad.into());
self.writer.reader().contains(&quad)
let reader = self.writer.reader();
let quad = reader.term_encoder().encode_quad(quad);
reader.contains(&quad)
}
/// Returns the number of quads in the store.
@ -1125,9 +1129,9 @@ impl<'a> Transaction<'a> {
&self,
graph_name: impl Into<NamedOrBlankNodeRef<'b>>,
) -> Result<bool, StorageError> {
self.writer
.reader()
.contains_named_graph(&EncodedTerm::from(graph_name.into()))
let reader = self.writer.reader();
let graph_name = reader.term_encoder().encode_graph_name(graph_name.into());
reader.contains_named_graph(&graph_name)
}
/// Inserts a graph into this store.

Loading…
Cancel
Save