Uses From for term encoding without insertion

Makes code simpler
pull/171/head
Tpt 4 years ago
parent 0b278a88a3
commit 0071f82662
  1. 23
      lib/src/sparql/dataset.rs
  2. 10
      lib/src/sparql/update.rs
  3. 2
      lib/src/storage/binary_encoder.rs
  4. 335
      lib/src/storage/numeric_encoder.rs
  5. 25
      lib/src/store.rs

@ -1,9 +1,6 @@
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{
get_encoded_graph_name, get_encoded_subject, EncodedQuad, EncodedTerm, StrContainer, StrHash,
StrLookup,
};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup};
use crate::storage::Storage;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
@ -19,18 +16,12 @@ pub(crate) struct DatasetView {
impl DatasetView {
pub fn new(storage: Storage, dataset: &QueryDataset) -> Result<Self, EvaluationError> {
let dataset = EncodedDatasetSpec {
default: dataset.default_graph_graphs().map(|graphs| {
graphs
.iter()
.map(|g| get_encoded_graph_name(g.as_ref()))
.collect::<Vec<_>>()
}),
named: dataset.available_named_graphs().map(|graphs| {
graphs
.iter()
.map(|g| get_encoded_subject(g.as_ref()))
.collect::<Vec<_>>()
}),
default: dataset
.default_graph_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
named: dataset
.available_named_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
};
Ok(Self {
storage,

@ -10,8 +10,7 @@ use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, UpdateOptions};
use crate::storage::io::load_graph;
use crate::storage::numeric_encoder::{
get_encoded_literal, get_encoded_named_node, EncodedQuad, EncodedTerm, EncodedTriple,
StrLookup, WriteEncoder,
EncodedQuad, EncodedTerm, EncodedTriple, StrLookup, WriteEncoder,
};
use crate::storage::Storage;
use http::header::{ACCEPT, CONTENT_TYPE, USER_AGENT};
@ -645,11 +644,11 @@ impl<'a> SimpleUpdateEvaluator<'a> {
}
fn encode_named_node_for_deletion(&self, term: &NamedNode) -> EncodedTerm {
get_encoded_named_node(NamedNodeRef::new_unchecked(&term.iri))
NamedNodeRef::new_unchecked(&term.iri).into()
}
fn encode_literal_for_deletion(&self, term: &Literal) -> EncodedTerm {
get_encoded_literal(match term {
match term {
Literal::Simple { value } => LiteralRef::new_simple_literal(value),
Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
@ -657,7 +656,8 @@ impl<'a> SimpleUpdateEvaluator<'a> {
Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(&datatype.iri))
}
})
}
.into()
}
fn encode_triple_for_deletion(&self, triple: &GroundTriple) -> EncodedTerm {

@ -738,7 +738,7 @@ mod tests {
];
for term in terms {
let encoded = store.encode_term(term.as_ref()).unwrap();
assert_eq!(encoded, get_encoded_term(term.as_ref()));
assert_eq!(encoded, term.as_ref().into());
assert_eq!(term, store.decode_term(&encoded).unwrap());
let mut buffer = Vec::new();

@ -481,6 +481,163 @@ impl From<EncodedTriple> for EncodedTerm {
}
}
impl From<NamedNodeRef<'_>> for EncodedTerm {
fn from(named_node: NamedNodeRef<'_>) -> Self {
Self::NamedNode {
iri_id: StrHash::new(named_node.as_str()),
}
}
}
impl From<BlankNodeRef<'_>> for EncodedTerm {
fn from(blank_node: BlankNodeRef<'_>) -> Self {
if let Some(id) = blank_node.id() {
Self::NumericalBlankNode { id }
} else {
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
Self::SmallBlankNode(id)
} else {
Self::BigBlankNode {
id_id: StrHash::new(id),
}
}
}
}
}
impl From<LiteralRef<'_>> for EncodedTerm {
fn from(literal: LiteralRef<'_>) -> Self {
let value = literal.value();
let datatype = literal.datatype().as_str();
let native_encoding = match datatype {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
if let Some(language) = literal.language() {
Some(if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: StrHash::new(language),
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value),
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value),
language_id: StrHash::new(language),
}
})
} else {
None
}
}
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => {
let value = value;
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value),
}
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
"http://www.w3.org/2001/XMLSchema#gYearMonth" => parse_g_year_month_str(value),
"http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(value),
"http://www.w3.org/2001/XMLSchema#gMonthDay" => parse_g_month_day_str(value),
"http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(value),
"http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(value),
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(value)
}
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
parse_day_time_duration_str(value)
}
_ => None,
};
match native_encoding {
Some(term) => term,
None => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: StrHash::new(datatype),
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype),
}
}
}
}
}
}
impl From<SubjectRef<'_>> for EncodedTerm {
fn from(term: SubjectRef<'_>) -> Self {
match term {
SubjectRef::NamedNode(named_node) => named_node.into(),
SubjectRef::BlankNode(blank_node) => blank_node.into(),
SubjectRef::Triple(triple) => triple.as_ref().into(),
}
}
}
impl From<TermRef<'_>> for EncodedTerm {
fn from(term: TermRef<'_>) -> Self {
match term {
TermRef::NamedNode(named_node) => named_node.into(),
TermRef::BlankNode(blank_node) => blank_node.into(),
TermRef::Literal(literal) => literal.into(),
TermRef::Triple(triple) => triple.as_ref().into(),
}
}
}
impl From<GraphNameRef<'_>> for EncodedTerm {
fn from(name: GraphNameRef<'_>) -> Self {
match name {
GraphNameRef::NamedNode(named_node) => named_node.into(),
GraphNameRef::BlankNode(blank_node) => blank_node.into(),
GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
}
}
}
impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self {
EncodedTerm::Triple(Rc::new(triple.into()))
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTriple {
pub subject: EncodedTerm,
@ -498,6 +655,16 @@ impl EncodedTriple {
}
}
impl From<TripleRef<'_>> for EncodedTriple {
fn from(triple: TripleRef<'_>) -> Self {
EncodedTriple {
subject: triple.subject.into(),
predicate: triple.predicate.into(),
object: triple.object.into(),
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
@ -522,6 +689,17 @@ impl EncodedQuad {
}
}
impl From<QuadRef<'_>> for EncodedQuad {
fn from(quad: QuadRef<'_>) -> Self {
EncodedQuad {
subject: quad.subject.into(),
predicate: quad.predicate.into(),
object: quad.object.into(),
graph_name: quad.graph_name.into(),
}
}
}
pub(crate) trait StrLookup {
type Error: Error + Into<EvaluationError> + 'static;
@ -533,163 +711,6 @@ pub(crate) trait StrLookup {
pub(crate) trait StrContainer: StrLookup {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, Self::Error>;
}
pub(crate) fn get_encoded_named_node(named_node: NamedNodeRef<'_>) -> EncodedTerm {
EncodedTerm::NamedNode {
iri_id: StrHash::new(named_node.as_str()),
}
}
pub(crate) fn get_encoded_blank_node(blank_node: BlankNodeRef<'_>) -> EncodedTerm {
if let Some(id) = blank_node.id() {
EncodedTerm::NumericalBlankNode { id }
} else {
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: StrHash::new(id),
}
}
}
}
pub(crate) fn get_encoded_literal(literal: LiteralRef<'_>) -> EncodedTerm {
let value = literal.value();
let datatype = literal.datatype().as_str();
let native_encoding = match datatype {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
if let Some(language) = literal.language() {
Some(if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: StrHash::new(language),
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value),
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value),
language_id: StrHash::new(language),
}
})
} else {
None
}
}
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => {
let value = value;
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value),
}
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
"http://www.w3.org/2001/XMLSchema#gYearMonth" => parse_g_year_month_str(value),
"http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(value),
"http://www.w3.org/2001/XMLSchema#gMonthDay" => parse_g_month_day_str(value),
"http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(value),
"http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(value),
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(value)
}
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => parse_day_time_duration_str(value),
_ => None,
};
match native_encoding {
Some(term) => term,
None => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: StrHash::new(datatype),
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype),
}
}
}
}
}
pub(crate) fn get_encoded_subject(term: SubjectRef<'_>) -> EncodedTerm {
match term {
SubjectRef::NamedNode(named_node) => get_encoded_named_node(named_node),
SubjectRef::BlankNode(blank_node) => get_encoded_blank_node(blank_node),
SubjectRef::Triple(triple) => {
EncodedTerm::Triple(Rc::new(get_encoded_triple(triple.as_ref())))
}
}
}
pub(crate) fn get_encoded_term(term: TermRef<'_>) -> EncodedTerm {
match term {
TermRef::NamedNode(named_node) => get_encoded_named_node(named_node),
TermRef::BlankNode(blank_node) => get_encoded_blank_node(blank_node),
TermRef::Literal(literal) => get_encoded_literal(literal),
TermRef::Triple(triple) => {
EncodedTerm::Triple(Rc::new(get_encoded_triple(triple.as_ref())))
}
}
}
pub(crate) fn get_encoded_graph_name(name: GraphNameRef<'_>) -> EncodedTerm {
match name {
GraphNameRef::NamedNode(named_node) => get_encoded_named_node(named_node),
GraphNameRef::BlankNode(blank_node) => get_encoded_blank_node(blank_node),
GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
}
}
pub(crate) fn get_encoded_triple(quad: TripleRef<'_>) -> EncodedTriple {
EncodedTriple {
subject: get_encoded_subject(quad.subject),
predicate: get_encoded_named_node(quad.predicate),
object: get_encoded_term(quad.object),
}
}
pub(crate) fn get_encoded_quad(quad: QuadRef<'_>) -> EncodedQuad {
EncodedQuad {
subject: get_encoded_subject(quad.subject),
predicate: get_encoded_named_node(quad.predicate),
object: get_encoded_term(quad.object),
graph_name: get_encoded_graph_name(quad.graph_name),
}
}
/// Encodes a term and insert strings if needed
pub(crate) trait WriteEncoder: StrContainer {

@ -31,10 +31,7 @@ use crate::sparql::{
UpdateOptions,
};
use crate::storage::io::{dump_dataset, dump_graph, load_dataset, load_graph};
use crate::storage::numeric_encoder::{
get_encoded_graph_name, get_encoded_named_node, get_encoded_quad, get_encoded_subject,
get_encoded_term, Decoder, WriteEncoder,
};
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm, WriteEncoder};
pub use crate::storage::ConflictableTransactionError;
pub use crate::storage::TransactionError;
pub use crate::storage::UnabortableTransactionError;
@ -166,10 +163,10 @@ impl Store {
) -> QuadIter {
QuadIter {
iter: self.storage.quads_for_pattern(
subject.map(get_encoded_subject).as_ref(),
predicate.map(get_encoded_named_node).as_ref(),
object.map(get_encoded_term).as_ref(),
graph_name.map(get_encoded_graph_name).as_ref(),
subject.map(EncodedTerm::from).as_ref(),
predicate.map(EncodedTerm::from).as_ref(),
object.map(EncodedTerm::from).as_ref(),
graph_name.map(EncodedTerm::from).as_ref(),
),
storage: self.storage.clone(),
}
@ -182,7 +179,7 @@ impl Store {
/// Checks if this store contains a given quad
pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
let quad = get_encoded_quad(quad.into());
let quad = EncodedQuad::from(quad.into());
self.storage.contains(&quad)
}
@ -375,7 +372,7 @@ impl Store {
/// It might leave the store in a bad state if a crash happens during the removal.
/// Use a (memory greedy) [transaction](Store::transaction()) if you do not want that.
pub fn remove<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, io::Error> {
let quad = get_encoded_quad(quad.into());
let quad = EncodedQuad::from(quad.into());
self.storage.remove(&quad)
}
@ -469,7 +466,7 @@ impl Store {
&self,
graph_name: impl Into<SubjectRef<'a>>,
) -> Result<bool, io::Error> {
let graph_name = get_encoded_subject(graph_name.into());
let graph_name = EncodedTerm::from(graph_name.into());
self.storage.contains_named_graph(&graph_name)
}
@ -518,7 +515,7 @@ impl Store {
&self,
graph_name: impl Into<GraphNameRef<'a>>,
) -> Result<(), io::Error> {
let graph_name = get_encoded_graph_name(graph_name.into());
let graph_name = EncodedTerm::from(graph_name.into());
self.storage.clear_graph(&graph_name)
}
@ -546,7 +543,7 @@ impl Store {
&self,
graph_name: impl Into<SubjectRef<'a>>,
) -> Result<bool, io::Error> {
let graph_name = get_encoded_subject(graph_name.into());
let graph_name = EncodedTerm::from(graph_name.into());
self.storage.remove_named_graph(&graph_name)
}
@ -700,7 +697,7 @@ impl Transaction<'_> {
&self,
quad: impl Into<QuadRef<'a>>,
) -> Result<bool, UnabortableTransactionError> {
let quad = get_encoded_quad(quad.into());
let quad = EncodedQuad::from(quad.into());
self.storage.remove(&quad)
}

Loading…
Cancel
Save