Store more terms inline

pull/46/head
Tpt 4 years ago
parent bf430de125
commit beebcdfbd6
  1. 408
      lib/src/sparql/eval.rs
  2. 326
      lib/src/store/binary_encoder.rs
  3. 2
      lib/src/store/memory.rs
  4. 1
      lib/src/store/mod.rs
  5. 570
      lib/src/store/numeric_encoder.rs
  6. 205
      lib/src/store/small_string.rs

@ -1,12 +1,13 @@
use crate::model::xsd::*;
use crate::model::BlankNode;
use crate::model::Triple;
use crate::model::{BlankNode, LiteralRef, NamedNodeRef};
use crate::sparql::algebra::{DatasetSpec, GraphPattern, QueryVariants};
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use crate::sparql::plan::*;
use crate::sparql::{Query, ServiceHandler};
use crate::store::numeric_encoder::*;
use crate::store::small_string::SmallString;
use crate::store::ReadableEncodedStore;
use digest::Digest;
use md5::Md5;
@ -14,7 +15,6 @@ use oxilangtag::LanguageTag;
use oxiri::Iri;
use rand::random;
use regex::{Regex, RegexBuilder};
use rio_api::model as rio;
use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512};
use std::cmp::Ordering;
@ -1008,14 +1008,19 @@ where
PlanExpression::UnaryNot(e) => self
.to_bool(self.eval_expression(e, tuple)?)
.map(|v| (!v).into()),
PlanExpression::Str(e) => Some(EncodedTerm::StringLiteral {
value_id: self.to_string_id(self.eval_expression(e, tuple)?)?,
}),
PlanExpression::Str(e) => {
Some(self.build_string_literal_from_id(
self.to_string_id(self.eval_expression(e, tuple)?)?,
))
}
PlanExpression::Lang(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::LangStringLiteral { language_id, .. } => {
Some(EncodedTerm::StringLiteral {
value_id: language_id,
})
EncodedTerm::SmallSmallLangStringLiteral { language, .. }
| EncodedTerm::BigSmallLangStringLiteral { language, .. } => {
Some(self.build_string_literal_from_id(language.into()))
}
EncodedTerm::SmallBigLangStringLiteral { language_id, .. }
| EncodedTerm::BigBigLangStringLiteral { language_id, .. } => {
Some(self.build_string_literal_from_id(language_id.into()))
}
e if e.is_literal() => self.build_string_literal(""),
_ => None,
@ -1047,17 +1052,26 @@ where
PlanExpression::Datatype(e) => self.datatype(self.eval_expression(e, tuple)?),
PlanExpression::Bound(v) => Some(tuple.contains(*v).into()),
PlanExpression::IRI(e) => {
let iri_id = match self.eval_expression(e, tuple)? {
EncodedTerm::NamedNode { iri_id } => Some(iri_id),
EncodedTerm::StringLiteral { value_id } => Some(value_id),
_ => None,
}?;
let iri = self.dataset.get_str(iri_id).ok()??;
if let Some(base_iri) = &self.base_iri {
self.build_named_node(&base_iri.resolve(&iri).ok()?.into_inner())
let e = self.eval_expression(e, tuple)?;
if e.is_named_node() {
Some(e)
} else {
Iri::parse(iri).ok()?;
Some(EncodedTerm::NamedNode { iri_id })
let iri = match e {
EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id } => {
self.dataset.get_str(value_id).ok()?
}
_ => None,
}?;
self.build_named_node(
&if let Some(base_iri) = &self.base_iri {
base_iri.resolve(&iri)
} else {
Iri::parse(iri)
}
.ok()?
.into_inner(),
)
}
}
PlanExpression::BNode(id) => match id {
@ -1072,7 +1086,7 @@ where
.ok()?,
)
}
None => Some(EncodedTerm::InlineBlankNode {
None => Some(EncodedTerm::NumericalBlankNode {
id: random::<u128>(),
}),
},
@ -1352,11 +1366,10 @@ where
}
}
PlanExpression::StrLang(lexical_form, lang_tag) => {
Some(EncodedTerm::LangStringLiteral {
value_id: self
.to_simple_string_id(self.eval_expression(lexical_form, tuple)?)?,
language_id: self.build_language_id(self.eval_expression(lang_tag, tuple)?)?,
})
Some(self.build_lang_string_literal_from_id(
self.to_simple_string_id(self.eval_expression(lexical_form, tuple)?)?,
self.build_language_id(self.eval_expression(lang_tag, tuple)?)?,
))
}
PlanExpression::StrDT(lexical_form, datatype) => {
let value = self.to_simple_string(self.eval_expression(lexical_form, tuple)?)?;
@ -1369,10 +1382,10 @@ where
}?;
let mut encoder = self.dataset.as_ref();
encoder
.encode_rio_literal(rio::Literal::Typed {
value: &value,
datatype: rio::NamedNode { iri: &datatype },
})
.encode_literal(LiteralRef::new_typed_literal(
&value,
NamedNodeRef::new_unchecked(&datatype),
))
.ok()
}
PlanExpression::SameTerm(a, b) => {
@ -1411,7 +1424,8 @@ where
}
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_boolean_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_boolean_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1424,7 +1438,8 @@ where
EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1_f64 } else { 0_f64 }.into())
}
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_double_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_double_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1437,7 +1452,9 @@ where
EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1_f32 } else { 0_f32 }.into())
}
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_float_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_float_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1448,7 +1465,8 @@ where
EncodedTerm::IntegerLiteral(value) => Some(value.into()),
EncodedTerm::DecimalLiteral(value) => Some(i64::try_from(value).ok()?.into()),
EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_integer_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_integer_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1461,7 +1479,8 @@ where
EncodedTerm::BooleanLiteral(value) => {
Some(Decimal::from(if value { 1 } else { 0 }).into())
}
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_decimal_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_decimal_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1469,7 +1488,8 @@ where
PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_date_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_date_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1477,7 +1497,8 @@ where
PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::TimeLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_time_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_time_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1485,7 +1506,8 @@ where
PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateTimeLiteral(value) => Some(value.into()),
EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_date_time_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_date_time_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1494,7 +1516,8 @@ where
EncodedTerm::DurationLiteral(value) => Some(value.into()),
EncodedTerm::YearMonthDurationLiteral(value) => Some(Duration::from(value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Some(Duration::from(value).into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_duration_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1504,7 +1527,8 @@ where
Some(YearMonthDuration::try_from(value).ok()?.into())
}
EncodedTerm::YearMonthDurationLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_year_month_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_year_month_duration_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
@ -1514,21 +1538,25 @@ where
Some(DayTimeDuration::try_from(value).ok()?.into())
}
EncodedTerm::DayTimeDurationLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_day_time_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_day_time_duration_str(&*self.dataset.get_str(value_id).ok()??)
}
_ => None,
},
PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral {
value_id: self.to_string_id(self.eval_expression(e, tuple)?)?,
}),
PlanExpression::StringCast(e) => {
Some(self.build_string_literal_from_id(
self.to_string_id(self.eval_expression(e, tuple)?)?,
))
}
}
}
fn to_bool(&self, term: EncodedTerm<S::StrId>) -> Option<bool> {
match term {
EncodedTerm::BooleanLiteral(value) => Some(value),
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => Some(!value.is_empty()),
EncodedTerm::BigStringLiteral { value_id } => {
Some(!self.dataset.get_str(value_id).ok()??.is_empty())
}
EncodedTerm::FloatLiteral(value) => Some(value != 0_f32),
@ -1539,14 +1567,21 @@ where
}
}
fn to_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<S::StrId> {
fn to_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<SmallStringOrId<S::StrId>> {
match term {
EncodedTerm::DefaultGraph => None,
EncodedTerm::NamedNode { iri_id } => Some(iri_id),
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => None,
EncodedTerm::StringLiteral { value_id }
| EncodedTerm::LangStringLiteral { value_id, .. }
| EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id),
EncodedTerm::NamedNode { iri_id } => Some(iri_id.into()),
EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => None,
EncodedTerm::SmallStringLiteral(value)
| EncodedTerm::SmallSmallLangStringLiteral { value, .. }
| EncodedTerm::SmallBigLangStringLiteral { value, .. }
| EncodedTerm::SmallTypedLiteral { value, .. } => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id }
| EncodedTerm::BigSmallLangStringLiteral { value_id, .. }
| EncodedTerm::BigBigLangStringLiteral { value_id, .. }
| EncodedTerm::BigTypedLiteral { value_id, .. } => Some(value_id.into()),
EncodedTerm::BooleanLiteral(value) => {
self.build_string_id(if value { "true" } else { "false" })
}
@ -1566,25 +1601,32 @@ where
}
fn to_simple_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> {
if let EncodedTerm::StringLiteral { value_id } = term {
self.dataset.get_str(value_id).ok()?
} else {
None
match term {
EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id } => self.dataset.get_str(value_id).ok()?,
_ => None,
}
}
fn to_simple_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<S::StrId> {
if let EncodedTerm::StringLiteral { value_id } = term {
Some(value_id)
} else {
None
fn to_simple_string_id(
&self,
term: EncodedTerm<S::StrId>,
) -> Option<SmallStringOrId<S::StrId>> {
match term {
EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id } => Some(value_id.into()),
_ => None,
}
}
fn to_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> {
match term {
EncodedTerm::StringLiteral { value_id }
| EncodedTerm::LangStringLiteral { value_id, .. } => {
EncodedTerm::SmallStringLiteral(value)
| EncodedTerm::SmallSmallLangStringLiteral { value, .. }
| EncodedTerm::SmallBigLangStringLiteral { value, .. } => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id }
| EncodedTerm::BigSmallLangStringLiteral { value_id, .. }
| EncodedTerm::BigBigLangStringLiteral { value_id, .. } => {
self.dataset.get_str(value_id).ok()?
}
_ => None,
@ -1594,46 +1636,85 @@ where
fn to_string_and_language(
&self,
term: EncodedTerm<S::StrId>,
) -> Option<(String, Option<S::StrId>)> {
) -> Option<(String, Option<SmallStringOrId<S::StrId>>)> {
match term {
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => Some((value.into(), None)),
EncodedTerm::BigStringLiteral { value_id } => {
Some((self.dataset.get_str(value_id).ok()??, None))
}
EncodedTerm::LangStringLiteral {
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Some((value.into(), Some(language.into())))
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Some((value.into(), Some(language_id.into())))
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Some((self.dataset.get_str(value_id).ok()??, Some(language.into())))
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Some((self.dataset.get_str(value_id).ok()??, Some(language_id))),
} => Some((
self.dataset.get_str(value_id).ok()??,
Some(language_id.into()),
)),
_ => None,
}
}
fn build_named_node(&self, iri: &str) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::NamedNode {
iri_id: self.build_string_id(iri)?,
iri_id: self.dataset.as_ref().encode_str(iri).ok()?,
})
}
fn build_string_literal(&self, value: &str) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::StringLiteral {
value_id: self.build_string_id(value)?,
})
Some(self.build_string_literal_from_id(self.build_string_id(value)?))
}
fn build_string_literal_from_id(&self, id: SmallStringOrId<S::StrId>) -> EncodedTerm<S::StrId> {
match id {
SmallStringOrId::Small(value) => EncodedTerm::SmallStringLiteral(value),
SmallStringOrId::Big(value_id) => EncodedTerm::BigStringLiteral { value_id },
}
}
fn build_lang_string_literal(
&self,
value: &str,
language_id: S::StrId,
language_id: SmallStringOrId<S::StrId>,
) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::LangStringLiteral {
value_id: self.build_string_id(value)?,
language_id,
})
Some(self.build_lang_string_literal_from_id(self.build_string_id(value)?, language_id))
}
fn build_lang_string_literal_from_id(
&self,
value_id: SmallStringOrId<S::StrId>,
language_id: SmallStringOrId<S::StrId>,
) -> EncodedTerm<S::StrId> {
match (value_id, language_id) {
(SmallStringOrId::Small(value), SmallStringOrId::Small(language)) => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
(SmallStringOrId::Small(value), SmallStringOrId::Big(language_id)) => {
EncodedTerm::SmallBigLangStringLiteral { value, language_id }
}
(SmallStringOrId::Big(value_id), SmallStringOrId::Small(language)) => {
EncodedTerm::BigSmallLangStringLiteral { value_id, language }
}
(SmallStringOrId::Big(value_id), SmallStringOrId::Big(language_id)) => {
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
}
}
}
}
fn build_plain_literal(
&self,
value: &str,
language: Option<S::StrId>,
language: Option<SmallStringOrId<S::StrId>>,
) -> Option<EncodedTerm<S::StrId>> {
if let Some(language_id) = language {
self.build_lang_string_literal(value, language_id)
@ -1642,11 +1723,15 @@ where
}
}
fn build_string_id(&self, value: &str) -> Option<S::StrId> {
self.dataset.as_ref().encode_str(value).ok()
fn build_string_id(&self, value: &str) -> Option<SmallStringOrId<S::StrId>> {
Some(if let Ok(value) = SmallString::try_from(value) {
value.into()
} else {
self.dataset.as_ref().encode_str(value).ok()?.into()
})
}
fn build_language_id(&self, value: EncodedTerm<S::StrId>) -> Option<S::StrId> {
fn build_language_id(&self, value: EncodedTerm<S::StrId>) -> Option<SmallStringOrId<S::StrId>> {
let mut language = self.to_simple_string(value)?;
language.make_ascii_lowercase();
self.build_string_id(LanguageTag::parse(language).ok()?.as_str())
@ -1656,7 +1741,7 @@ where
&self,
arg1: EncodedTerm<S::StrId>,
arg2: EncodedTerm<S::StrId>,
) -> Option<(String, String, Option<S::StrId>)> {
) -> Option<(String, String, Option<SmallStringOrId<S::StrId>>)> {
let (value1, language1) = self.to_string_and_language(arg1)?;
let (value2, language2) = self.to_string_and_language(arg2)?;
if language2.is_none() || language1 == language2 {
@ -1763,17 +1848,52 @@ where
match a {
EncodedTerm::DefaultGraph
| EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. }
| EncodedTerm::LangStringLiteral { .. } => Some(a == b),
EncodedTerm::StringLiteral { value_id: a } => match b {
EncodedTerm::StringLiteral { value_id: b } => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. } => Some(a == b),
EncodedTerm::SmallStringLiteral(a) => match b {
EncodedTerm::SmallStringLiteral(b) => Some(a == b),
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::BigStringLiteral { value_id: a } => match b {
EncodedTerm::BigStringLiteral { value_id: b } => Some(a == b),
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::SmallTypedLiteral { .. } => match b {
EncodedTerm::SmallTypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. }
| EncodedTerm::BigTypedLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::BigTypedLiteral { .. } => match b {
EncodedTerm::BigTypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. }
| EncodedTerm::SmallTypedLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::BooleanLiteral(a) => match b {
EncodedTerm::BooleanLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::FloatLiteral(a) => match b {
@ -1781,7 +1901,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some(f64::from(a) == b),
EncodedTerm::IntegerLiteral(b) => Some(a == b as f32),
EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f32()),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::DoubleLiteral(a) => match b {
@ -1789,7 +1909,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some(a == b),
EncodedTerm::IntegerLiteral(b) => Some(a == (b as f64)),
EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f64()),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::IntegerLiteral(a) => match b {
@ -1797,7 +1917,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some((a as f64) == b),
EncodedTerm::IntegerLiteral(b) => Some(a == b),
EncodedTerm::DecimalLiteral(b) => Some(Decimal::from(a) == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::DecimalLiteral(a) => match b {
@ -1805,51 +1925,43 @@ where
EncodedTerm::DoubleLiteral(b) => Some(a.to_f64() == b),
EncodedTerm::IntegerLiteral(b) => Some(a == Decimal::from(b)),
EncodedTerm::DecimalLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::TypedLiteral { .. } => match b {
EncodedTerm::TypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. }
| EncodedTerm::LangStringLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::DateLiteral(a) => match b {
EncodedTerm::DateLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::TimeLiteral(a) => match b {
EncodedTerm::TimeLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::DateTimeLiteral(a) => match b {
EncodedTerm::DateTimeLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::DurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::YearMonthDurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::DayTimeDurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None,
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
}
@ -1874,26 +1986,28 @@ where
) -> Ordering {
match (a, b) {
(Some(a), Some(b)) => match a {
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => {
match b {
EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. } => Ordering::Equal,
_ => Ordering::Less,
}
}
EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => match b {
EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => Ordering::Equal,
_ => Ordering::Less,
},
EncodedTerm::NamedNode { iri_id: a } => match b {
EncodedTerm::NamedNode { iri_id: b } => {
self.compare_str_ids(a, b).unwrap_or(Ordering::Equal)
}
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => {
Ordering::Greater
}
EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => Ordering::Greater,
_ => Ordering::Less,
},
a => match b {
EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. } => Ordering::Greater,
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => Ordering::Greater,
b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal),
},
},
@ -1910,13 +2024,16 @@ where
b: EncodedTerm<S::StrId>,
) -> Option<Ordering> {
match a {
EncodedTerm::StringLiteral { value_id: a } => {
if let EncodedTerm::StringLiteral { value_id: b } = b {
self.compare_str_ids(a, b)
} else {
None
}
}
EncodedTerm::SmallStringLiteral(a) => match b {
EncodedTerm::SmallStringLiteral(b) => a.partial_cmp(&b),
EncodedTerm::BigStringLiteral { value_id: b } => self.compare_str_str_id(&a, b),
_ => None,
},
EncodedTerm::BigStringLiteral { value_id: a } => match b {
EncodedTerm::SmallStringLiteral(b) => self.compare_str_id_str(a, &b),
EncodedTerm::BigStringLiteral { value_id: b } => self.compare_str_ids(a, b),
_ => None,
},
EncodedTerm::FloatLiteral(a) => match b {
EncodedTerm::FloatLiteral(ref b) => a.partial_cmp(b),
EncodedTerm::DoubleLiteral(ref b) => f64::from(a).partial_cmp(b),
@ -1997,6 +2114,14 @@ where
)
}
fn compare_str_id_str(&self, a: S::StrId, b: &str) -> Option<Ordering> {
Some(self.dataset.get_str(a).ok()??.as_str().cmp(b))
}
fn compare_str_str_id(&self, a: &str, b: S::StrId) -> Option<Ordering> {
Some(a.cmp(self.dataset.get_str(b).ok()??.as_str()))
}
fn hash<H: Digest>(
&self,
arg: &PlanExpression<S::StrId>,
@ -2011,16 +2136,21 @@ where
//TODO: optimize?
match value {
EncodedTerm::NamedNode { .. }
| EncodedTerm::NamedBlankNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::DefaultGraph => None,
EncodedTerm::StringLiteral { .. } => {
EncodedTerm::SmallStringLiteral(_) | EncodedTerm::BigStringLiteral { .. } => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#string")
}
EncodedTerm::LangStringLiteral { .. } => {
EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. } => {
self.build_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
}
EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
EncodedTerm::SmallTypedLiteral { datatype_id, .. }
| EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: datatype_id,
}),
EncodedTerm::BooleanLiteral(..) => {
@ -2504,7 +2634,7 @@ fn get_triple_template_value<I: StrId>(
}
fn new_bnode<I: StrId>() -> EncodedTerm<I> {
EncodedTerm::InlineBlankNode { id: random() }
EncodedTerm::NumericalBlankNode { id: random() }
}
fn decode_triple<D: Decoder>(
@ -2922,7 +3052,7 @@ impl<I: StrId> Accumulator<I> for SampleAccumulator<I> {
struct GroupConcatAccumulator<S: ReadableEncodedStore + 'static> {
eval: SimpleEvaluator<S>,
concat: Option<String>,
language: Option<Option<S::StrId>>,
language: Option<Option<SmallStringOrId<S::StrId>>>,
separator: Rc<String>,
}
@ -3001,6 +3131,24 @@ fn write_hexa_bytes(bytes: &[u8], buffer: &mut String) {
}
}
#[derive(Eq, PartialEq, Clone, Copy)]
enum SmallStringOrId<I: StrId> {
Small(SmallString),
Big(I),
}
impl<I: StrId> From<SmallString> for SmallStringOrId<I> {
fn from(value: SmallString) -> Self {
Self::Small(value)
}
}
impl<I: StrId> From<I> for SmallStringOrId<I> {
fn from(value: I) -> Self {
Self::Big(value)
}
}
#[test]
fn uuid() {
let mut buffer = String::default();

@ -1,6 +1,7 @@
use crate::error::invalid_data_error;
use crate::model::xsd::*;
use crate::store::numeric_encoder::StrId;
use crate::store::small_string::SmallString;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::hash::Hasher;
use std::io;
@ -11,33 +12,38 @@ type EncodedTerm = crate::store::numeric_encoder::EncodedTerm<StrHash>;
type EncodedQuad = crate::store::numeric_encoder::EncodedQuad<StrHash>;
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
// Encoded term type blocks
// 1-7: usual named nodes (except prefixes c.f. later)
// 8-15: blank nodes
// 16-47: literals
// 48-64: future use
// 64-127: default named node prefixes
// 128-255: custom named node prefixes
const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_INLINE_BLANK_NODE_ID: u8 = 2;
const TYPE_NAMED_BLANK_NODE_ID: u8 = 3;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 4;
const TYPE_TYPED_LITERAL_ID: u8 = 5;
const TYPE_STRING_LITERAL: u8 = 6;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8;
const TYPE_FLOAT_LITERAL: u8 = 9;
const TYPE_DOUBLE_LITERAL: u8 = 10;
const TYPE_INTEGER_LITERAL: u8 = 11;
const TYPE_DECIMAL_LITERAL: u8 = 12;
const TYPE_DATE_TIME_LITERAL: u8 = 13;
const TYPE_DATE_LITERAL: u8 = 14;
const TYPE_TIME_LITERAL: u8 = 15;
const TYPE_DURATION_LITERAL: u8 = 16;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18;
pub trait SerializableStrId: StrId {
fn len() -> usize;
fn from_be_bytes(bytes: &[u8]) -> Self;
fn push_be_bytes(&self, buffer: &mut Vec<u8>);
}
const TYPE_NUMERICAL_BLANK_NODE_ID: u8 = 8;
const TYPE_SMALL_BLANK_NODE_ID: u8 = 9;
const TYPE_BIG_BLANK_NODE_ID: u8 = 10;
const TYPE_SMALL_STRING_LITERAL: u8 = 16;
const TYPE_BIG_STRING_LITERAL: u8 = 17;
const TYPE_SMALL_SMALL_LANG_STRING_LITERAL: u8 = 20;
const TYPE_SMALL_BIG_LANG_STRING_LITERAL: u8 = 21;
const TYPE_BIG_SMALL_LANG_STRING_LITERAL: u8 = 22;
const TYPE_BIG_BIG_LANG_STRING_LITERAL: u8 = 23;
const TYPE_SMALL_TYPED_LITERAL: u8 = 24;
const TYPE_BIG_TYPED_LITERAL: u8 = 25;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 28;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 29;
const TYPE_FLOAT_LITERAL: u8 = 30;
const TYPE_DOUBLE_LITERAL: u8 = 31;
const TYPE_INTEGER_LITERAL: u8 = 32;
const TYPE_DECIMAL_LITERAL: u8 = 33;
const TYPE_DATE_TIME_LITERAL: u8 = 34;
const TYPE_DATE_LITERAL: u8 = 35;
const TYPE_TIME_LITERAL: u8 = 36;
const TYPE_DURATION_LITERAL: u8 = 37;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 38;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 39;
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
@ -69,24 +75,6 @@ impl StrHash {
impl StrId for StrHash {}
impl SerializableStrId for StrHash {
fn len() -> usize {
16
}
fn from_be_bytes(bytes: &[u8]) -> Self {
let mut hash = [0; 16];
hash.copy_from_slice(bytes);
Self {
hash: u128::from_be_bytes(hash),
}
}
fn push_be_bytes(&self, buffer: &mut Vec<u8>) {
buffer.extend_from_slice(&self.to_be_bytes())
}
}
#[derive(Clone, Copy)]
pub enum QuadEncoding {
SPOG,
@ -240,7 +228,6 @@ impl<R: Read> TermReader for R {
let mut type_buffer = [0];
self.read_exact(&mut type_buffer)?;
match type_buffer[0] {
TYPE_DEFAULT_GRAPH_ID => Ok(EncodedTerm::DefaultGraph),
TYPE_NAMED_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
@ -248,44 +235,100 @@ impl<R: Read> TermReader for R {
iri_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_INLINE_BLANK_NODE_ID => {
TYPE_NUMERICAL_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::InlineBlankNode {
Ok(EncodedTerm::NumericalBlankNode {
id: u128::from_be_bytes(buffer),
})
}
TYPE_NAMED_BLANK_NODE_ID => {
TYPE_SMALL_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::SmallBlankNode(
SmallString::from_be_bytes(buffer).map_err(invalid_data_error)?,
))
}
TYPE_BIG_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedBlankNode {
Ok(EncodedTerm::BigBlankNode {
id_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_LANG_STRING_LITERAL_ID => {
TYPE_SMALL_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallSmallLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
language: SmallString::from_be_bytes(language_buffer)
.map_err(invalid_data_error)?,
})
}
TYPE_SMALL_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral {
Ok(EncodedTerm::SmallBigLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_BIG_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer),
language: SmallString::from_be_bytes(language_buffer)
.map_err(invalid_data_error)?,
})
}
TYPE_TYPED_LITERAL_ID => {
TYPE_BIG_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer),
language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_SMALL_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral {
Ok(EncodedTerm::SmallTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
})
}
TYPE_BIG_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_STRING_LITERAL => {
TYPE_SMALL_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::SmallStringLiteral(
SmallString::from_be_bytes(buffer).map_err(invalid_data_error)?,
))
}
TYPE_BIG_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral {
Ok(EncodedTerm::BigStringLiteral {
value_id: StrHash::from_be_bytes(buffer),
})
}
@ -451,38 +494,66 @@ pub fn encode_term_quad(
pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
match term {
EncodedTerm::DefaultGraph => sink.push(TYPE_DEFAULT_GRAPH_ID),
EncodedTerm::DefaultGraph => (),
EncodedTerm::NamedNode { iri_id } => {
sink.push(TYPE_NAMED_NODE_ID);
iri_id.push_be_bytes(sink)
sink.extend_from_slice(&iri_id.to_be_bytes());
}
EncodedTerm::InlineBlankNode { id } => {
sink.push(TYPE_INLINE_BLANK_NODE_ID);
EncodedTerm::NumericalBlankNode { id } => {
sink.push(TYPE_NUMERICAL_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes())
}
EncodedTerm::NamedBlankNode { id_id } => {
sink.push(TYPE_NAMED_BLANK_NODE_ID);
id_id.push_be_bytes(sink)
EncodedTerm::SmallBlankNode(id) => {
sink.push(TYPE_SMALL_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes())
}
EncodedTerm::StringLiteral { value_id } => {
sink.push(TYPE_STRING_LITERAL);
value_id.push_be_bytes(sink)
EncodedTerm::BigBlankNode { id_id } => {
sink.push(TYPE_BIG_BLANK_NODE_ID);
sink.extend_from_slice(&id_id.to_be_bytes());
}
EncodedTerm::LangStringLiteral {
EncodedTerm::SmallStringLiteral(value) => {
sink.push(TYPE_SMALL_STRING_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::BigStringLiteral { value_id } => {
sink.push(TYPE_BIG_STRING_LITERAL);
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
sink.push(TYPE_SMALL_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
sink.push(TYPE_SMALL_BIG_LANG_STRING_LITERAL);
sink.extend_from_slice(&language_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
sink.push(TYPE_BIG_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => {
sink.push(TYPE_LANG_STRING_LITERAL_ID);
value_id.push_be_bytes(sink);
language_id.push_be_bytes(sink);
sink.push(TYPE_BIG_BIG_LANG_STRING_LITERAL);
sink.extend_from_slice(&language_id.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::TypedLiteral {
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
sink.push(TYPE_SMALL_TYPED_LITERAL);
sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => {
sink.push(TYPE_TYPED_LITERAL_ID);
value_id.push_be_bytes(sink);
datatype_id.push_be_bytes(sink);
sink.push(TYPE_BIG_TYPED_LITERAL);
sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::BooleanLiteral(true) => sink.push(TYPE_BOOLEAN_LITERAL_TRUE),
EncodedTerm::BooleanLiteral(false) => sink.push(TYPE_BOOLEAN_LITERAL_FALSE),
@ -528,3 +599,112 @@ pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::store::numeric_encoder::*;
use std::collections::HashMap;
use std::convert::Infallible;
struct MemoryStrStore {
id2str: HashMap<StrHash, String>,
}
impl Default for MemoryStrStore {
fn default() -> Self {
Self {
id2str: HashMap::default(),
}
}
}
impl WithStoreError for MemoryStrStore {
type Error = Infallible;
type StrId = StrHash;
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Infallible> {
Ok(self.id2str.get(&id).cloned())
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Infallible> {
let id = StrHash::new(value);
Ok(if self.id2str.contains_key(&id) {
Some(id)
} else {
None
})
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&mut self, value: &str) -> Result<StrHash, Infallible> {
let key = StrHash::new(value);
self.id2str.entry(key).or_insert_with(|| value.to_owned());
Ok(key)
}
}
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
use crate::model::*;
let mut store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
NamedNode::new_unchecked("http://bar.com").into(),
NamedNode::new_unchecked("http://foo.com").into(),
BlankNode::default().into(),
BlankNode::new_unchecked("bnode").into(),
BlankNode::new_unchecked("foo-bnode-thisisaverylargeblanknode").into(),
Literal::new_simple_literal("literal").into(),
BlankNode::new_unchecked("foo-literal-thisisaverylargestringliteral").into(),
Literal::from(true).into(),
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo-string").into(),
Literal::new_language_tagged_literal_unchecked("foo-fr", "fr").into(),
Literal::new_language_tagged_literal_unchecked(
"foo-fr-literal-thisisaverylargelanguagetaggedstringliteral",
"fr",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big-literal-thisisaverylargelanguagetaggedstringliteral",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION).into(),
Literal::new_typed_literal("-foo", NamedNode::new_unchecked("http://foo.com")).into(),
Literal::new_typed_literal(
"-foo-thisisaverybigtypedliteralwiththefoodatatype",
NamedNode::new_unchecked("http://foo.com"),
)
.into(),
];
for term in terms {
let encoded = store.encode_term(term.as_ref()).unwrap();
assert_eq!(
Some(encoded),
store.get_encoded_term(term.as_ref()).unwrap()
);
assert_eq!(term, store.decode_term(encoded).unwrap());
let mut buffer = Vec::new();
write_term(&mut buffer, encoded);
assert_eq!(encoded, Cursor::new(&buffer).read_term().unwrap());
}
}
}

@ -1394,7 +1394,7 @@ fn label(g: &MemoryStore, hashes: &HashMap<EncodedTerm, u64>) -> Vec<String> {
fn map_term(term: EncodedTerm, bnodes_hash: &HashMap<EncodedTerm, u64>) -> EncodedTerm {
if term.is_blank_node() {
EncodedTerm::InlineBlankNode {
EncodedTerm::NumericalBlankNode {
id: (*bnodes_hash.get(&term).unwrap()).into(),
}
} else {

@ -11,6 +11,7 @@ pub(crate) mod numeric_encoder;
pub mod rocksdb;
#[cfg(feature = "sled")]
pub mod sled;
pub(crate) mod small_string;
pub use crate::store::memory::MemoryStore;
#[cfg(feature = "rocksdb")]

@ -4,11 +4,11 @@ use crate::error::invalid_data_error;
use crate::model::xsd::*;
use crate::model::*;
use crate::sparql::EvaluationError;
use lasso::{Rodeo, Spur};
use crate::store::small_string::SmallString;
use rand::random;
use rio_api::model as rio;
use std::collections::HashMap;
use std::convert::Infallible;
use std::convert::{TryFrom, TryInto};
use std::error::Error;
use std::fmt::Debug;
use std::hash::Hash;
@ -20,12 +20,44 @@ pub trait StrId: Eq + Debug + Copy + Hash {}
#[derive(Debug, Clone, Copy)]
pub enum EncodedTerm<I: StrId> {
DefaultGraph,
NamedNode { iri_id: I },
InlineBlankNode { id: u128 },
NamedBlankNode { id_id: I },
StringLiteral { value_id: I },
LangStringLiteral { value_id: I, language_id: I },
TypedLiteral { value_id: I, datatype_id: I },
NamedNode {
iri_id: I,
},
NumericalBlankNode {
id: u128,
},
SmallBlankNode(SmallString),
BigBlankNode {
id_id: I,
},
SmallStringLiteral(SmallString),
BigStringLiteral {
value_id: I,
},
SmallSmallLangStringLiteral {
value: SmallString,
language: SmallString,
},
SmallBigLangStringLiteral {
value: SmallString,
language_id: I,
},
BigSmallLangStringLiteral {
value_id: I,
language: SmallString,
},
BigBigLangStringLiteral {
value_id: I,
language_id: I,
},
SmallTypedLiteral {
value: SmallString,
datatype_id: I,
},
BigTypedLiteral {
value_id: I,
datatype_id: I,
},
BooleanLiteral(bool),
FloatLiteral(f32),
DoubleLiteral(f64),
@ -46,36 +78,78 @@ impl<I: StrId> PartialEq for EncodedTerm<I> {
(Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => {
iri_id_a == iri_id_b
}
(Self::InlineBlankNode { id: id_a }, Self::InlineBlankNode { id: id_b }) => {
(Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => {
id_a == id_b
}
(Self::NamedBlankNode { id_id: id_a }, Self::NamedBlankNode { id_id: id_b }) => {
(Self::SmallBlankNode(id_a), Self::SmallBlankNode(id_b)) => id_a == id_b,
(Self::BigBlankNode { id_id: id_a }, Self::BigBlankNode { id_id: id_b }) => {
id_a == id_b
}
(Self::SmallStringLiteral(a), Self::SmallStringLiteral(b)) => a == b,
(
Self::StringLiteral {
Self::BigStringLiteral {
value_id: value_id_a,
},
Self::StringLiteral {
Self::BigStringLiteral {
value_id: value_id_b,
},
) => value_id_a == value_id_b,
(
Self::LangStringLiteral {
Self::SmallSmallLangStringLiteral {
value: value_a,
language: language_a,
},
Self::SmallSmallLangStringLiteral {
value: value_b,
language: language_b,
},
) => value_a == value_b && language_a == language_b,
(
Self::SmallBigLangStringLiteral {
value: value_a,
language_id: language_id_a,
},
Self::SmallBigLangStringLiteral {
value: value_b,
language_id: language_id_b,
},
) => value_a == value_b && language_id_a == language_id_b,
(
Self::BigSmallLangStringLiteral {
value_id: value_id_a,
language: language_a,
},
Self::BigSmallLangStringLiteral {
value_id: value_id_b,
language: language_b,
},
) => value_id_a == value_id_b && language_a == language_b,
(
Self::BigBigLangStringLiteral {
value_id: value_id_a,
language_id: language_id_a,
},
Self::LangStringLiteral {
Self::BigBigLangStringLiteral {
value_id: value_id_b,
language_id: language_id_b,
},
) => value_id_a == value_id_b && language_id_a == language_id_b,
(
Self::TypedLiteral {
Self::SmallTypedLiteral {
value: value_a,
datatype_id: datatype_id_a,
},
Self::SmallTypedLiteral {
value: value_b,
datatype_id: datatype_id_b,
},
) => value_a == value_b && datatype_id_a == datatype_id_b,
(
Self::BigTypedLiteral {
value_id: value_id_a,
datatype_id: datatype_id_a,
},
Self::TypedLiteral {
Self::BigTypedLiteral {
value_id: value_id_b,
datatype_id: datatype_id_b,
},
@ -114,18 +188,36 @@ impl<I: StrId> Hash for EncodedTerm<I> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Self::NamedNode { iri_id } => iri_id.hash(state),
Self::InlineBlankNode { id } => id.hash(state),
Self::NamedBlankNode { id_id } => id_id.hash(state),
Self::NumericalBlankNode { id } => id.hash(state),
Self::SmallBlankNode(id) => id.hash(state),
Self::BigBlankNode { id_id } => id_id.hash(state),
Self::DefaultGraph => (),
Self::StringLiteral { value_id } => value_id.hash(state),
Self::LangStringLiteral {
Self::SmallStringLiteral(value) => value.hash(state),
Self::BigStringLiteral { value_id } => value_id.hash(state),
Self::SmallSmallLangStringLiteral { value, language } => {
value.hash(state);
language.hash(state);
}
Self::SmallBigLangStringLiteral { value, language_id } => {
value.hash(state);
language_id.hash(state);
}
Self::BigSmallLangStringLiteral { value_id, language } => {
value_id.hash(state);
language.hash(state);
}
Self::BigBigLangStringLiteral {
value_id,
language_id,
} => {
value_id.hash(state);
language_id.hash(state);
}
Self::TypedLiteral {
Self::SmallTypedLiteral { value, datatype_id } => {
value.hash(state);
datatype_id.hash(state);
}
Self::BigTypedLiteral {
value_id,
datatype_id,
} => {
@ -157,16 +249,23 @@ impl<I: StrId> EncodedTerm<I> {
pub fn is_blank_node(&self) -> bool {
match self {
Self::InlineBlankNode { .. } | Self::NamedBlankNode { .. } => true,
Self::NumericalBlankNode { .. }
| Self::SmallBlankNode { .. }
| Self::BigBlankNode { .. } => true,
_ => false,
}
}
pub fn is_literal(&self) -> bool {
match self {
Self::StringLiteral { .. }
| Self::LangStringLiteral { .. }
| Self::TypedLiteral { .. }
Self::SmallStringLiteral { .. }
| Self::BigStringLiteral { .. }
| Self::SmallSmallLangStringLiteral { .. }
| Self::SmallBigLangStringLiteral { .. }
| Self::BigSmallLangStringLiteral { .. }
| Self::BigBigLangStringLiteral { .. }
| Self::SmallTypedLiteral { .. }
| Self::BigTypedLiteral { .. }
| Self::BooleanLiteral(_)
| Self::FloatLiteral(_)
| Self::DoubleLiteral(_)
@ -192,24 +291,45 @@ impl<I: StrId> EncodedTerm<I> {
Self::NamedNode { iri_id } => EncodedTerm::NamedNode {
iri_id: mapping(iri_id),
},
Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id },
Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode {
Self::NumericalBlankNode { id } => EncodedTerm::NumericalBlankNode { id },
Self::SmallBlankNode(id) => EncodedTerm::SmallBlankNode(id),
Self::BigBlankNode { id_id } => EncodedTerm::BigBlankNode {
id_id: mapping(id_id),
},
Self::StringLiteral { value_id } => EncodedTerm::StringLiteral {
Self::SmallStringLiteral(value) => EncodedTerm::SmallStringLiteral(value),
Self::BigStringLiteral { value_id } => EncodedTerm::BigStringLiteral {
value_id: mapping(value_id),
},
Self::LangStringLiteral {
Self::SmallSmallLangStringLiteral { value, language } => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
Self::SmallBigLangStringLiteral { value, language_id } => {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: mapping(language_id),
}
}
Self::BigSmallLangStringLiteral { value_id, language } => {
EncodedTerm::BigSmallLangStringLiteral {
value_id: mapping(value_id),
language,
}
}
Self::BigBigLangStringLiteral {
value_id,
language_id,
} => EncodedTerm::LangStringLiteral {
} => EncodedTerm::BigBigLangStringLiteral {
value_id: mapping(value_id),
language_id: mapping(language_id),
},
Self::TypedLiteral {
Self::SmallTypedLiteral { value, datatype_id } => EncodedTerm::SmallTypedLiteral {
value,
datatype_id: mapping(datatype_id),
},
Self::BigTypedLiteral {
value_id,
datatype_id,
} => EncodedTerm::TypedLiteral {
} => EncodedTerm::BigTypedLiteral {
value_id: mapping(value_id),
datatype_id: mapping(datatype_id),
},
@ -233,24 +353,45 @@ impl<I: StrId> EncodedTerm<I> {
Self::NamedNode { iri_id } => EncodedTerm::NamedNode {
iri_id: mapping(iri_id)?,
},
Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id },
Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode {
Self::NumericalBlankNode { id } => EncodedTerm::NumericalBlankNode { id },
Self::SmallBlankNode(id) => EncodedTerm::SmallBlankNode(id),
Self::BigBlankNode { id_id } => EncodedTerm::BigBlankNode {
id_id: mapping(id_id)?,
},
Self::StringLiteral { value_id } => EncodedTerm::StringLiteral {
Self::SmallStringLiteral(value) => EncodedTerm::SmallStringLiteral(value),
Self::BigStringLiteral { value_id } => EncodedTerm::BigStringLiteral {
value_id: mapping(value_id)?,
},
Self::LangStringLiteral {
Self::SmallSmallLangStringLiteral { value, language } => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
Self::SmallBigLangStringLiteral { value, language_id } => {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: mapping(language_id)?,
}
}
Self::BigSmallLangStringLiteral { value_id, language } => {
EncodedTerm::BigSmallLangStringLiteral {
value_id: mapping(value_id)?,
language,
}
}
Self::BigBigLangStringLiteral {
value_id,
language_id,
} => EncodedTerm::LangStringLiteral {
} => EncodedTerm::BigBigLangStringLiteral {
value_id: mapping(value_id)?,
language_id: mapping(language_id)?,
},
Self::TypedLiteral {
Self::SmallTypedLiteral { value, datatype_id } => EncodedTerm::SmallTypedLiteral {
value,
datatype_id: mapping(datatype_id)?,
},
Self::BigTypedLiteral {
value_id,
datatype_id,
} => EncodedTerm::TypedLiteral {
} => EncodedTerm::BigTypedLiteral {
value_id: mapping(value_id)?,
datatype_id: mapping(datatype_id)?,
},
@ -397,35 +538,6 @@ pub(crate) trait StrContainer: WithStoreError {
fn insert_str(&mut self, value: &str) -> Result<Self::StrId, Self::Error>;
}
#[derive(Default)]
pub struct MemoryStrStore {
inner: Rodeo,
}
impl StrId for Spur {}
impl WithStoreError for MemoryStrStore {
type Error = Infallible;
type StrId = Spur;
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, id: Spur) -> Result<Option<String>, Infallible> {
//TODO: avoid copy by adding a lifetime limit to get_str
Ok(self.inner.try_resolve(&id).map(|e| e.to_owned()))
}
fn get_str_id(&self, value: &str) -> Result<Option<Spur>, Infallible> {
Ok(self.inner.get(value))
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&mut self, value: &str) -> Result<Spur, Infallible> {
Ok(self.inner.get_or_intern(value))
}
}
/// Tries to encode a term based on the existing strings (does not insert anything)
pub(crate) trait ReadEncoder: WithStoreError {
fn get_encoded_named_node(
@ -446,14 +558,19 @@ pub(crate) trait ReadEncoder: WithStoreError {
blank_node: BlankNodeRef<'_>,
) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> {
Ok(Some(if let Some(id) = blank_node.id() {
EncodedTerm::InlineBlankNode { id }
EncodedTerm::NumericalBlankNode { id }
} else {
EncodedTerm::NamedBlankNode {
id_id: if let Some(id_id) = self.get_encoded_str(blank_node.as_str())? {
id_id
} else {
return Ok(None);
},
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: if let Some(id_id) = self.get_encoded_str(id)? {
id_id
} else {
return Ok(None);
},
}
}
}))
}
@ -462,40 +579,73 @@ pub(crate) trait ReadEncoder: WithStoreError {
&self,
literal: LiteralRef<'_>,
) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> {
let value = literal.value();
let datatype = literal.datatype().as_str();
Ok(Some(
match match literal.datatype().as_str() {
match match datatype {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
if let Some(language) = literal.language() {
Some(EncodedTerm::LangStringLiteral {
value_id: if let Some(value_id) =
self.get_encoded_str(literal.value())?
{
value_id
} else {
return Ok(None);
},
language_id: if let Some(language_id) =
self.get_encoded_str(language)?
{
language_id
if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
Some(EncodedTerm::SmallSmallLangStringLiteral { value, language })
} else {
return Ok(None);
},
})
Some(EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: if let Some(language_id) =
self.get_encoded_str(language)?
{
language_id
} else {
return Ok(None);
},
})
}
} else if let Ok(language) = SmallString::try_from(language) {
Some(EncodedTerm::BigSmallLangStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id
} else {
return Ok(None);
},
language,
})
} else {
Some(EncodedTerm::BigBigLangStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id
} else {
return Ok(None);
},
language_id: if let Some(language_id) =
self.get_encoded_str(language)?
{
language_id
} else {
return Ok(None);
},
})
}
} else {
None
}
}
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(literal.value())? {
value_id
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => {
let value = value;
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
return Ok(None);
},
}),
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(literal.value()),
EncodedTerm::BigStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id
} else {
return Ok(None);
},
}
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
@ -508,40 +658,51 @@ pub(crate) trait ReadEncoder: WithStoreError {
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
parse_integer_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(literal.value()),
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
parse_date_time_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(literal.value()),
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(literal.value())
parse_year_month_duration_str(value)
}
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
parse_day_time_duration_str(literal.value())
parse_day_time_duration_str(value)
}
_ => None,
} {
Some(term) => term,
None => EncodedTerm::TypedLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(literal.value())? {
value_id
} else {
return Ok(None);
},
datatype_id: if let Some(datatype_id) =
self.get_encoded_str(literal.datatype().as_str())?
{
datatype_id
None => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: if let Some(datatype_id) =
self.get_encoded_str(datatype)?
{
datatype_id
} else {
return Ok(None);
},
}
} else {
return Ok(None);
},
},
EncodedTerm::BigTypedLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id
} else {
return Ok(None);
},
datatype_id: if let Some(datatype_id) =
self.get_encoded_str(datatype)?
{
datatype_id
} else {
return Ok(None);
},
}
}
}
},
))
}
@ -628,13 +789,18 @@ pub(crate) trait WriteEncoder: WithStoreError {
&mut self,
blank_node: BlankNodeRef<'_>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
if let Some(id) = blank_node.id() {
Ok(EncodedTerm::InlineBlankNode { id })
Ok(if let Some(id) = blank_node.id() {
EncodedTerm::NumericalBlankNode { id }
} else {
Ok(EncodedTerm::NamedBlankNode {
id_id: self.encode_str(blank_node.as_str())?,
})
}
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: self.encode_str(id)?,
}
}
})
}
fn encode_literal(
@ -710,11 +876,11 @@ pub(crate) trait WriteEncoder: WithStoreError {
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
Ok(if let Some(id) = bnodes_map.get(blank_node.id) {
EncodedTerm::InlineBlankNode { id: *id }
EncodedTerm::NumericalBlankNode { id: *id }
} else {
let id = random::<u128>();
bnodes_map.insert(blank_node.id.to_owned(), id);
EncodedTerm::InlineBlankNode { id }
EncodedTerm::NumericalBlankNode { id }
})
}
fn encode_rio_literal(
@ -722,21 +888,49 @@ pub(crate) trait WriteEncoder: WithStoreError {
literal: rio::Literal<'_>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
Ok(match literal {
rio::Literal::Simple { value } => EncodedTerm::StringLiteral {
value_id: self.encode_str(value)?,
},
rio::Literal::Simple { value } => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(value)?,
}
}
}
rio::Literal::LanguageTaggedString { value, language } => {
EncodedTerm::LangStringLiteral {
value_id: self.encode_str(value)?,
language_id: self.encode_str(language)?,
if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: self.encode_str(language)?,
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: self.encode_str(value)?,
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: self.encode_str(value)?,
language_id: self.encode_str(language)?,
}
}
}
rio::Literal::Typed { value, datatype } => {
match match datatype.iri {
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral {
value_id: self.encode_str(value)?,
}),
"http://www.w3.org/2001/XMLSchema#string" => {
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(value)?,
}
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer"
@ -771,10 +965,19 @@ pub(crate) trait WriteEncoder: WithStoreError {
_ => None,
} {
Some(v) => v,
None => EncodedTerm::TypedLiteral {
value_id: self.encode_str(value)?,
datatype_id: self.encode_str(datatype.iri)?,
},
None => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: self.encode_str(datatype.iri)?,
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: self.encode_str(value)?,
datatype_id: self.encode_str(datatype.iri)?,
}
}
}
}
}
})
@ -968,14 +1171,33 @@ impl<S: StrLookup> Decoder for S {
EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
}
EncodedTerm::InlineBlankNode { id } => Ok(BlankNode::new_from_unique_id(id).into()),
EncodedTerm::NamedBlankNode { id_id } => {
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
}
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => Ok(Literal::new_simple_literal(value).into()),
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
}
EncodedTerm::LangStringLiteral {
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(value, language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
value,
get_required_str(self, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked(
@ -983,7 +1205,14 @@ impl<S: StrLookup> Decoder for S {
get_required_str(self, language_id)?,
)
.into()),
EncodedTerm::TypedLiteral {
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
value,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => Ok(Literal::new_typed_literal(
@ -1053,42 +1282,3 @@ impl<E: Into<io::Error>> From<DecoderError<E>> for io::Error {
}
}
}
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
let mut store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
NamedNode::new_unchecked("http://bar.com").into(),
NamedNode::new_unchecked("http://foo.com").into(),
BlankNode::default().into(),
BlankNode::new_unchecked("foo-bnode").into(),
Literal::new_simple_literal("foo-literal").into(),
Literal::from(true).into(),
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo-string").into(),
Literal::new_language_tagged_literal("foo-fr", "fr")
.unwrap()
.into(),
Literal::new_language_tagged_literal("foo-FR", "FR")
.unwrap()
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION).into(),
Literal::new_typed_literal("-foo", NamedNode::new_unchecked("http://foo.com")).into(),
];
for term in terms {
let encoded = store.encode_term(term.as_ref()).unwrap();
assert_eq!(
Some(encoded),
store.get_encoded_term(term.as_ref()).unwrap()
);
assert_eq!(term, store.decode_term(encoded).unwrap());
}
}

@ -0,0 +1,205 @@
use nom::lib::std::convert::TryFrom;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::convert::TryInto;
use std::error::Error;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::str;
use std::str::{FromStr, Utf8Error};
/// A small inline string
#[derive(Clone, Copy, Default)]
#[repr(transparent)]
pub struct SmallString {
inner: [u8; 16],
}
impl SmallString {
#[inline]
pub const fn new() -> Self {
Self { inner: [0; 16] }
}
#[inline]
pub fn from_utf8(bytes: &[u8]) -> Result<SmallString, BadSmallStringError> {
Self::from_str(str::from_utf8(bytes).map_err(BadSmallStringError::BadUtf8)?)
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Result<SmallString, BadSmallStringError> {
// We check that it is valid UTF-8
str::from_utf8(&bytes.as_ref()[..bytes[15].into()])
.map_err(BadSmallStringError::BadUtf8)?;
Ok(Self { inner: bytes })
}
#[inline]
pub fn len(&self) -> usize {
self.inner[15].into()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[allow(unsafe_code)]
pub fn as_str(&self) -> &str {
unsafe {
// safe because we ensured it in constructors
str::from_utf8_unchecked(self.as_bytes())
}
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.inner[..self.len()]
}
#[inline]
pub fn to_be_bytes(self) -> [u8; 16] {
self.inner
}
}
impl Deref for SmallString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl AsRef<str> for SmallString {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl Borrow<str> for SmallString {
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl fmt::Debug for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl fmt::Display for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl PartialEq for SmallString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str().eq(other.deref())
}
}
impl Eq for SmallString {}
impl PartialOrd for SmallString {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl Ord for SmallString {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Hash for SmallString {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl From<SmallString> for String {
#[inline]
fn from(value: SmallString) -> Self {
value.as_str().into()
}
}
impl<'a> From<&'a SmallString> for &'a str {
#[inline]
fn from(value: &'a SmallString) -> Self {
value.as_str()
}
}
impl FromStr for SmallString {
type Err = BadSmallStringError;
#[inline]
fn from_str(value: &str) -> Result<Self, BadSmallStringError> {
if value.len() <= 15 {
let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value
.len()
.try_into()
.map_err(|_| BadSmallStringError::TooLong(value.len()))?;
Ok(Self { inner })
} else {
Err(BadSmallStringError::TooLong(value.len()))
}
}
}
impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError;
#[inline]
fn try_from(value: &'a str) -> Result<Self, BadSmallStringError> {
Self::from_str(value)
}
}
#[derive(Debug, Clone, Copy)]
pub enum BadSmallStringError {
TooLong(usize),
BadUtf8(Utf8Error),
}
impl fmt::Display for BadSmallStringError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooLong(v) => write!(
f,
"small strings could only contain at most 15 characters, found {}",
v
),
Self::BadUtf8(e) => e.fmt(f),
}
}
}
impl Error for BadSmallStringError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::TooLong(_) => None,
Self::BadUtf8(e) => Some(e),
}
}
}
Loading…
Cancel
Save