Store more terms inline

pull/46/head
Tpt 4 years ago
parent bf430de125
commit beebcdfbd6
  1. 400
      lib/src/sparql/eval.rs
  2. 326
      lib/src/store/binary_encoder.rs
  3. 2
      lib/src/store/memory.rs
  4. 1
      lib/src/store/mod.rs
  5. 500
      lib/src/store/numeric_encoder.rs
  6. 205
      lib/src/store/small_string.rs

@ -1,12 +1,13 @@
use crate::model::xsd::*; use crate::model::xsd::*;
use crate::model::BlankNode;
use crate::model::Triple; use crate::model::Triple;
use crate::model::{BlankNode, LiteralRef, NamedNodeRef};
use crate::sparql::algebra::{DatasetSpec, GraphPattern, QueryVariants}; use crate::sparql::algebra::{DatasetSpec, GraphPattern, QueryVariants};
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::model::*; use crate::sparql::model::*;
use crate::sparql::plan::*; use crate::sparql::plan::*;
use crate::sparql::{Query, ServiceHandler}; use crate::sparql::{Query, ServiceHandler};
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::store::small_string::SmallString;
use crate::store::ReadableEncodedStore; use crate::store::ReadableEncodedStore;
use digest::Digest; use digest::Digest;
use md5::Md5; use md5::Md5;
@ -14,7 +15,6 @@ use oxilangtag::LanguageTag;
use oxiri::Iri; use oxiri::Iri;
use rand::random; use rand::random;
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder};
use rio_api::model as rio;
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512}; use sha2::{Sha256, Sha384, Sha512};
use std::cmp::Ordering; use std::cmp::Ordering;
@ -1008,14 +1008,19 @@ where
PlanExpression::UnaryNot(e) => self PlanExpression::UnaryNot(e) => self
.to_bool(self.eval_expression(e, tuple)?) .to_bool(self.eval_expression(e, tuple)?)
.map(|v| (!v).into()), .map(|v| (!v).into()),
PlanExpression::Str(e) => Some(EncodedTerm::StringLiteral { PlanExpression::Str(e) => {
value_id: self.to_string_id(self.eval_expression(e, tuple)?)?, Some(self.build_string_literal_from_id(
}), self.to_string_id(self.eval_expression(e, tuple)?)?,
))
}
PlanExpression::Lang(e) => match self.eval_expression(e, tuple)? { PlanExpression::Lang(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::LangStringLiteral { language_id, .. } => { EncodedTerm::SmallSmallLangStringLiteral { language, .. }
Some(EncodedTerm::StringLiteral { | EncodedTerm::BigSmallLangStringLiteral { language, .. } => {
value_id: language_id, Some(self.build_string_literal_from_id(language.into()))
}) }
EncodedTerm::SmallBigLangStringLiteral { language_id, .. }
| EncodedTerm::BigBigLangStringLiteral { language_id, .. } => {
Some(self.build_string_literal_from_id(language_id.into()))
} }
e if e.is_literal() => self.build_string_literal(""), e if e.is_literal() => self.build_string_literal(""),
_ => None, _ => None,
@ -1047,17 +1052,26 @@ where
PlanExpression::Datatype(e) => self.datatype(self.eval_expression(e, tuple)?), PlanExpression::Datatype(e) => self.datatype(self.eval_expression(e, tuple)?),
PlanExpression::Bound(v) => Some(tuple.contains(*v).into()), PlanExpression::Bound(v) => Some(tuple.contains(*v).into()),
PlanExpression::IRI(e) => { PlanExpression::IRI(e) => {
let iri_id = match self.eval_expression(e, tuple)? { let e = self.eval_expression(e, tuple)?;
EncodedTerm::NamedNode { iri_id } => Some(iri_id), if e.is_named_node() {
EncodedTerm::StringLiteral { value_id } => Some(value_id), Some(e)
} else {
let iri = match e {
EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id } => {
self.dataset.get_str(value_id).ok()?
}
_ => None, _ => None,
}?; }?;
let iri = self.dataset.get_str(iri_id).ok()??; self.build_named_node(
if let Some(base_iri) = &self.base_iri { &if let Some(base_iri) = &self.base_iri {
self.build_named_node(&base_iri.resolve(&iri).ok()?.into_inner()) base_iri.resolve(&iri)
} else { } else {
Iri::parse(iri).ok()?; Iri::parse(iri)
Some(EncodedTerm::NamedNode { iri_id }) }
.ok()?
.into_inner(),
)
} }
} }
PlanExpression::BNode(id) => match id { PlanExpression::BNode(id) => match id {
@ -1072,7 +1086,7 @@ where
.ok()?, .ok()?,
) )
} }
None => Some(EncodedTerm::InlineBlankNode { None => Some(EncodedTerm::NumericalBlankNode {
id: random::<u128>(), id: random::<u128>(),
}), }),
}, },
@ -1352,11 +1366,10 @@ where
} }
} }
PlanExpression::StrLang(lexical_form, lang_tag) => { PlanExpression::StrLang(lexical_form, lang_tag) => {
Some(EncodedTerm::LangStringLiteral { Some(self.build_lang_string_literal_from_id(
value_id: self self.to_simple_string_id(self.eval_expression(lexical_form, tuple)?)?,
.to_simple_string_id(self.eval_expression(lexical_form, tuple)?)?, self.build_language_id(self.eval_expression(lang_tag, tuple)?)?,
language_id: self.build_language_id(self.eval_expression(lang_tag, tuple)?)?, ))
})
} }
PlanExpression::StrDT(lexical_form, datatype) => { PlanExpression::StrDT(lexical_form, datatype) => {
let value = self.to_simple_string(self.eval_expression(lexical_form, tuple)?)?; let value = self.to_simple_string(self.eval_expression(lexical_form, tuple)?)?;
@ -1369,10 +1382,10 @@ where
}?; }?;
let mut encoder = self.dataset.as_ref(); let mut encoder = self.dataset.as_ref();
encoder encoder
.encode_rio_literal(rio::Literal::Typed { .encode_literal(LiteralRef::new_typed_literal(
value: &value, &value,
datatype: rio::NamedNode { iri: &datatype }, NamedNodeRef::new_unchecked(&datatype),
}) ))
.ok() .ok()
} }
PlanExpression::SameTerm(a, b) => { PlanExpression::SameTerm(a, b) => {
@ -1411,7 +1424,8 @@ where
} }
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_boolean_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_boolean_str(&*self.dataset.get_str(value_id).ok()??) parse_boolean_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1424,7 +1438,8 @@ where
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1_f64 } else { 0_f64 }.into()) Some(if value { 1_f64 } else { 0_f64 }.into())
} }
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_double_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_double_str(&*self.dataset.get_str(value_id).ok()??) parse_double_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1437,7 +1452,9 @@ where
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1_f32 } else { 0_f32 }.into()) Some(if value { 1_f32 } else { 0_f32 }.into())
} }
EncodedTerm::StringLiteral { value_id } => {
EncodedTerm::SmallStringLiteral(value) => parse_float_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_float_str(&*self.dataset.get_str(value_id).ok()??) parse_float_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1448,7 +1465,8 @@ where
EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::IntegerLiteral(value) => Some(value.into()),
EncodedTerm::DecimalLiteral(value) => Some(i64::try_from(value).ok()?.into()), EncodedTerm::DecimalLiteral(value) => Some(i64::try_from(value).ok()?.into()),
EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()), EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_integer_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_integer_str(&*self.dataset.get_str(value_id).ok()??) parse_integer_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1461,7 +1479,8 @@ where
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
Some(Decimal::from(if value { 1 } else { 0 }).into()) Some(Decimal::from(if value { 1 } else { 0 }).into())
} }
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_decimal_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_decimal_str(&*self.dataset.get_str(value_id).ok()??) parse_decimal_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1469,7 +1488,8 @@ where
PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateLiteral(value) => Some(value.into()), EncodedTerm::DateLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()), EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_date_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_date_str(&*self.dataset.get_str(value_id).ok()??) parse_date_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1477,7 +1497,8 @@ where
PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::TimeLiteral(value) => Some(value.into()), EncodedTerm::TimeLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()), EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_time_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_time_str(&*self.dataset.get_str(value_id).ok()??) parse_time_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1485,7 +1506,8 @@ where
PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateTimeLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(value.into()),
EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()), EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_date_time_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_date_time_str(&*self.dataset.get_str(value_id).ok()??) parse_date_time_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1494,7 +1516,8 @@ where
EncodedTerm::DurationLiteral(value) => Some(value.into()), EncodedTerm::DurationLiteral(value) => Some(value.into()),
EncodedTerm::YearMonthDurationLiteral(value) => Some(Duration::from(value).into()), EncodedTerm::YearMonthDurationLiteral(value) => Some(Duration::from(value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Some(Duration::from(value).into()), EncodedTerm::DayTimeDurationLiteral(value) => Some(Duration::from(value).into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_duration_str(&*self.dataset.get_str(value_id).ok()??) parse_duration_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1504,7 +1527,8 @@ where
Some(YearMonthDuration::try_from(value).ok()?.into()) Some(YearMonthDuration::try_from(value).ok()?.into())
} }
EncodedTerm::YearMonthDurationLiteral(value) => Some(value.into()), EncodedTerm::YearMonthDurationLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_year_month_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_year_month_duration_str(&*self.dataset.get_str(value_id).ok()??) parse_year_month_duration_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
@ -1514,21 +1538,25 @@ where
Some(DayTimeDuration::try_from(value).ok()?.into()) Some(DayTimeDuration::try_from(value).ok()?.into())
} }
EncodedTerm::DayTimeDurationLiteral(value) => Some(value.into()), EncodedTerm::DayTimeDurationLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => parse_day_time_duration_str(&value),
EncodedTerm::BigStringLiteral { value_id } => {
parse_day_time_duration_str(&*self.dataset.get_str(value_id).ok()??) parse_day_time_duration_str(&*self.dataset.get_str(value_id).ok()??)
} }
_ => None, _ => None,
}, },
PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral { PlanExpression::StringCast(e) => {
value_id: self.to_string_id(self.eval_expression(e, tuple)?)?, Some(self.build_string_literal_from_id(
}), self.to_string_id(self.eval_expression(e, tuple)?)?,
))
}
} }
} }
fn to_bool(&self, term: EncodedTerm<S::StrId>) -> Option<bool> { fn to_bool(&self, term: EncodedTerm<S::StrId>) -> Option<bool> {
match term { match term {
EncodedTerm::BooleanLiteral(value) => Some(value), EncodedTerm::BooleanLiteral(value) => Some(value),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => Some(!value.is_empty()),
EncodedTerm::BigStringLiteral { value_id } => {
Some(!self.dataset.get_str(value_id).ok()??.is_empty()) Some(!self.dataset.get_str(value_id).ok()??.is_empty())
} }
EncodedTerm::FloatLiteral(value) => Some(value != 0_f32), EncodedTerm::FloatLiteral(value) => Some(value != 0_f32),
@ -1539,14 +1567,21 @@ where
} }
} }
fn to_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<S::StrId> { fn to_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<SmallStringOrId<S::StrId>> {
match term { match term {
EncodedTerm::DefaultGraph => None, EncodedTerm::DefaultGraph => None,
EncodedTerm::NamedNode { iri_id } => Some(iri_id), EncodedTerm::NamedNode { iri_id } => Some(iri_id.into()),
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => None, EncodedTerm::NumericalBlankNode { .. }
EncodedTerm::StringLiteral { value_id } | EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::LangStringLiteral { value_id, .. } | EncodedTerm::BigBlankNode { .. } => None,
| EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id), EncodedTerm::SmallStringLiteral(value)
| EncodedTerm::SmallSmallLangStringLiteral { value, .. }
| EncodedTerm::SmallBigLangStringLiteral { value, .. }
| EncodedTerm::SmallTypedLiteral { value, .. } => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id }
| EncodedTerm::BigSmallLangStringLiteral { value_id, .. }
| EncodedTerm::BigBigLangStringLiteral { value_id, .. }
| EncodedTerm::BigTypedLiteral { value_id, .. } => Some(value_id.into()),
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
self.build_string_id(if value { "true" } else { "false" }) self.build_string_id(if value { "true" } else { "false" })
} }
@ -1566,25 +1601,32 @@ where
} }
fn to_simple_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> { fn to_simple_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> {
if let EncodedTerm::StringLiteral { value_id } = term { match term {
self.dataset.get_str(value_id).ok()? EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
} else { EncodedTerm::BigStringLiteral { value_id } => self.dataset.get_str(value_id).ok()?,
None _ => None,
} }
} }
fn to_simple_string_id(&self, term: EncodedTerm<S::StrId>) -> Option<S::StrId> { fn to_simple_string_id(
if let EncodedTerm::StringLiteral { value_id } = term { &self,
Some(value_id) term: EncodedTerm<S::StrId>,
} else { ) -> Option<SmallStringOrId<S::StrId>> {
None match term {
EncodedTerm::SmallStringLiteral(value) => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id } => Some(value_id.into()),
_ => None,
} }
} }
fn to_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> { fn to_string(&self, term: EncodedTerm<S::StrId>) -> Option<String> {
match term { match term {
EncodedTerm::StringLiteral { value_id } EncodedTerm::SmallStringLiteral(value)
| EncodedTerm::LangStringLiteral { value_id, .. } => { | EncodedTerm::SmallSmallLangStringLiteral { value, .. }
| EncodedTerm::SmallBigLangStringLiteral { value, .. } => Some(value.into()),
EncodedTerm::BigStringLiteral { value_id }
| EncodedTerm::BigSmallLangStringLiteral { value_id, .. }
| EncodedTerm::BigBigLangStringLiteral { value_id, .. } => {
self.dataset.get_str(value_id).ok()? self.dataset.get_str(value_id).ok()?
} }
_ => None, _ => None,
@ -1594,46 +1636,85 @@ where
fn to_string_and_language( fn to_string_and_language(
&self, &self,
term: EncodedTerm<S::StrId>, term: EncodedTerm<S::StrId>,
) -> Option<(String, Option<S::StrId>)> { ) -> Option<(String, Option<SmallStringOrId<S::StrId>>)> {
match term { match term {
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => Some((value.into(), None)),
EncodedTerm::BigStringLiteral { value_id } => {
Some((self.dataset.get_str(value_id).ok()??, None)) Some((self.dataset.get_str(value_id).ok()??, None))
} }
EncodedTerm::LangStringLiteral { EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Some((value.into(), Some(language.into())))
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Some((value.into(), Some(language_id.into())))
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Some((self.dataset.get_str(value_id).ok()??, Some(language.into())))
}
EncodedTerm::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => Some((self.dataset.get_str(value_id).ok()??, Some(language_id))), } => Some((
self.dataset.get_str(value_id).ok()??,
Some(language_id.into()),
)),
_ => None, _ => None,
} }
} }
fn build_named_node(&self, iri: &str) -> Option<EncodedTerm<S::StrId>> { fn build_named_node(&self, iri: &str) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::NamedNode { Some(EncodedTerm::NamedNode {
iri_id: self.build_string_id(iri)?, iri_id: self.dataset.as_ref().encode_str(iri).ok()?,
}) })
} }
fn build_string_literal(&self, value: &str) -> Option<EncodedTerm<S::StrId>> { fn build_string_literal(&self, value: &str) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::StringLiteral { Some(self.build_string_literal_from_id(self.build_string_id(value)?))
value_id: self.build_string_id(value)?, }
})
fn build_string_literal_from_id(&self, id: SmallStringOrId<S::StrId>) -> EncodedTerm<S::StrId> {
match id {
SmallStringOrId::Small(value) => EncodedTerm::SmallStringLiteral(value),
SmallStringOrId::Big(value_id) => EncodedTerm::BigStringLiteral { value_id },
}
} }
fn build_lang_string_literal( fn build_lang_string_literal(
&self, &self,
value: &str, value: &str,
language_id: S::StrId, language_id: SmallStringOrId<S::StrId>,
) -> Option<EncodedTerm<S::StrId>> { ) -> Option<EncodedTerm<S::StrId>> {
Some(EncodedTerm::LangStringLiteral { Some(self.build_lang_string_literal_from_id(self.build_string_id(value)?, language_id))
value_id: self.build_string_id(value)?, }
fn build_lang_string_literal_from_id(
&self,
value_id: SmallStringOrId<S::StrId>,
language_id: SmallStringOrId<S::StrId>,
) -> EncodedTerm<S::StrId> {
match (value_id, language_id) {
(SmallStringOrId::Small(value), SmallStringOrId::Small(language)) => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
(SmallStringOrId::Small(value), SmallStringOrId::Big(language_id)) => {
EncodedTerm::SmallBigLangStringLiteral { value, language_id }
}
(SmallStringOrId::Big(value_id), SmallStringOrId::Small(language)) => {
EncodedTerm::BigSmallLangStringLiteral { value_id, language }
}
(SmallStringOrId::Big(value_id), SmallStringOrId::Big(language_id)) => {
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id, language_id,
}) }
}
}
} }
fn build_plain_literal( fn build_plain_literal(
&self, &self,
value: &str, value: &str,
language: Option<S::StrId>, language: Option<SmallStringOrId<S::StrId>>,
) -> Option<EncodedTerm<S::StrId>> { ) -> Option<EncodedTerm<S::StrId>> {
if let Some(language_id) = language { if let Some(language_id) = language {
self.build_lang_string_literal(value, language_id) self.build_lang_string_literal(value, language_id)
@ -1642,11 +1723,15 @@ where
} }
} }
fn build_string_id(&self, value: &str) -> Option<S::StrId> { fn build_string_id(&self, value: &str) -> Option<SmallStringOrId<S::StrId>> {
self.dataset.as_ref().encode_str(value).ok() Some(if let Ok(value) = SmallString::try_from(value) {
value.into()
} else {
self.dataset.as_ref().encode_str(value).ok()?.into()
})
} }
fn build_language_id(&self, value: EncodedTerm<S::StrId>) -> Option<S::StrId> { fn build_language_id(&self, value: EncodedTerm<S::StrId>) -> Option<SmallStringOrId<S::StrId>> {
let mut language = self.to_simple_string(value)?; let mut language = self.to_simple_string(value)?;
language.make_ascii_lowercase(); language.make_ascii_lowercase();
self.build_string_id(LanguageTag::parse(language).ok()?.as_str()) self.build_string_id(LanguageTag::parse(language).ok()?.as_str())
@ -1656,7 +1741,7 @@ where
&self, &self,
arg1: EncodedTerm<S::StrId>, arg1: EncodedTerm<S::StrId>,
arg2: EncodedTerm<S::StrId>, arg2: EncodedTerm<S::StrId>,
) -> Option<(String, String, Option<S::StrId>)> { ) -> Option<(String, String, Option<SmallStringOrId<S::StrId>>)> {
let (value1, language1) = self.to_string_and_language(arg1)?; let (value1, language1) = self.to_string_and_language(arg1)?;
let (value2, language2) = self.to_string_and_language(arg2)?; let (value2, language2) = self.to_string_and_language(arg2)?;
if language2.is_none() || language1 == language2 { if language2.is_none() || language1 == language2 {
@ -1763,17 +1848,52 @@ where
match a { match a {
EncodedTerm::DefaultGraph EncodedTerm::DefaultGraph
| EncodedTerm::NamedNode { .. } | EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. } | EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::LangStringLiteral { .. } => Some(a == b), | EncodedTerm::BigBlankNode { .. }
EncodedTerm::StringLiteral { value_id: a } => match b { | EncodedTerm::SmallSmallLangStringLiteral { .. }
EncodedTerm::StringLiteral { value_id: b } => Some(a == b), | EncodedTerm::SmallBigLangStringLiteral { .. }
EncodedTerm::TypedLiteral { .. } => None, | EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. } => Some(a == b),
EncodedTerm::SmallStringLiteral(a) => match b {
EncodedTerm::SmallStringLiteral(b) => Some(a == b),
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::BigStringLiteral { value_id: a } => match b {
EncodedTerm::BigStringLiteral { value_id: b } => Some(a == b),
EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false),
},
EncodedTerm::SmallTypedLiteral { .. } => match b {
EncodedTerm::SmallTypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. }
| EncodedTerm::BigTypedLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::BigTypedLiteral { .. } => match b {
EncodedTerm::BigTypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. }
| EncodedTerm::SmallTypedLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::BooleanLiteral(a) => match b { EncodedTerm::BooleanLiteral(a) => match b {
EncodedTerm::BooleanLiteral(b) => Some(a == b), EncodedTerm::BooleanLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::FloatLiteral(a) => match b { EncodedTerm::FloatLiteral(a) => match b {
@ -1781,7 +1901,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some(f64::from(a) == b), EncodedTerm::DoubleLiteral(b) => Some(f64::from(a) == b),
EncodedTerm::IntegerLiteral(b) => Some(a == b as f32), EncodedTerm::IntegerLiteral(b) => Some(a == b as f32),
EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f32()), EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f32()),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::DoubleLiteral(a) => match b { EncodedTerm::DoubleLiteral(a) => match b {
@ -1789,7 +1909,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some(a == b), EncodedTerm::DoubleLiteral(b) => Some(a == b),
EncodedTerm::IntegerLiteral(b) => Some(a == (b as f64)), EncodedTerm::IntegerLiteral(b) => Some(a == (b as f64)),
EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f64()), EncodedTerm::DecimalLiteral(b) => Some(a == b.to_f64()),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::IntegerLiteral(a) => match b { EncodedTerm::IntegerLiteral(a) => match b {
@ -1797,7 +1917,7 @@ where
EncodedTerm::DoubleLiteral(b) => Some((a as f64) == b), EncodedTerm::DoubleLiteral(b) => Some((a as f64) == b),
EncodedTerm::IntegerLiteral(b) => Some(a == b), EncodedTerm::IntegerLiteral(b) => Some(a == b),
EncodedTerm::DecimalLiteral(b) => Some(Decimal::from(a) == b), EncodedTerm::DecimalLiteral(b) => Some(Decimal::from(a) == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::DecimalLiteral(a) => match b { EncodedTerm::DecimalLiteral(a) => match b {
@ -1805,51 +1925,43 @@ where
EncodedTerm::DoubleLiteral(b) => Some(a.to_f64() == b), EncodedTerm::DoubleLiteral(b) => Some(a.to_f64() == b),
EncodedTerm::IntegerLiteral(b) => Some(a == Decimal::from(b)), EncodedTerm::IntegerLiteral(b) => Some(a == Decimal::from(b)),
EncodedTerm::DecimalLiteral(b) => Some(a == b), EncodedTerm::DecimalLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::TypedLiteral { .. } => match b {
EncodedTerm::TypedLiteral { .. } if a == b => Some(true),
EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. }
| EncodedTerm::LangStringLiteral { .. } => Some(false),
_ => None,
},
EncodedTerm::DateLiteral(a) => match b { EncodedTerm::DateLiteral(a) => match b {
EncodedTerm::DateLiteral(b) => Some(a == b), EncodedTerm::DateLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::TimeLiteral(a) => match b { EncodedTerm::TimeLiteral(a) => match b {
EncodedTerm::TimeLiteral(b) => Some(a == b), EncodedTerm::TimeLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::DateTimeLiteral(a) => match b { EncodedTerm::DateTimeLiteral(a) => match b {
EncodedTerm::DateTimeLiteral(b) => Some(a == b), EncodedTerm::DateTimeLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::DurationLiteral(a) => match b { EncodedTerm::DurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b), EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b), EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b), EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::YearMonthDurationLiteral(a) => match b { EncodedTerm::YearMonthDurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b), EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b), EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b), EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
EncodedTerm::DayTimeDurationLiteral(a) => match b { EncodedTerm::DayTimeDurationLiteral(a) => match b {
EncodedTerm::DurationLiteral(b) => Some(a == b), EncodedTerm::DurationLiteral(b) => Some(a == b),
EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b), EncodedTerm::YearMonthDurationLiteral(b) => Some(a == b),
EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b), EncodedTerm::DayTimeDurationLiteral(b) => Some(a == b),
EncodedTerm::TypedLiteral { .. } => None, EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None,
_ => Some(false), _ => Some(false),
}, },
} }
@ -1874,26 +1986,28 @@ where
) -> Ordering { ) -> Ordering {
match (a, b) { match (a, b) {
(Some(a), Some(b)) => match a { (Some(a), Some(b)) => match a {
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => { EncodedTerm::NumericalBlankNode { .. }
match b { | EncodedTerm::SmallBlankNode { .. }
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::BigBlankNode { .. } => match b {
| EncodedTerm::NamedBlankNode { .. } => Ordering::Equal, EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => Ordering::Equal,
_ => Ordering::Less, _ => Ordering::Less,
} },
}
EncodedTerm::NamedNode { iri_id: a } => match b { EncodedTerm::NamedNode { iri_id: a } => match b {
EncodedTerm::NamedNode { iri_id: b } => { EncodedTerm::NamedNode { iri_id: b } => {
self.compare_str_ids(a, b).unwrap_or(Ordering::Equal) self.compare_str_ids(a, b).unwrap_or(Ordering::Equal)
} }
EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NamedBlankNode { .. } => { EncodedTerm::NumericalBlankNode { .. }
Ordering::Greater | EncodedTerm::SmallBlankNode { .. }
} | EncodedTerm::BigBlankNode { .. } => Ordering::Greater,
_ => Ordering::Less, _ => Ordering::Less,
}, },
a => match b { a => match b {
EncodedTerm::NamedNode { .. } EncodedTerm::NamedNode { .. }
| EncodedTerm::InlineBlankNode { .. } | EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::NamedBlankNode { .. } => Ordering::Greater, | EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::BigBlankNode { .. } => Ordering::Greater,
b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal), b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal),
}, },
}, },
@ -1910,13 +2024,16 @@ where
b: EncodedTerm<S::StrId>, b: EncodedTerm<S::StrId>,
) -> Option<Ordering> { ) -> Option<Ordering> {
match a { match a {
EncodedTerm::StringLiteral { value_id: a } => { EncodedTerm::SmallStringLiteral(a) => match b {
if let EncodedTerm::StringLiteral { value_id: b } = b { EncodedTerm::SmallStringLiteral(b) => a.partial_cmp(&b),
self.compare_str_ids(a, b) EncodedTerm::BigStringLiteral { value_id: b } => self.compare_str_str_id(&a, b),
} else { _ => None,
None },
} EncodedTerm::BigStringLiteral { value_id: a } => match b {
} EncodedTerm::SmallStringLiteral(b) => self.compare_str_id_str(a, &b),
EncodedTerm::BigStringLiteral { value_id: b } => self.compare_str_ids(a, b),
_ => None,
},
EncodedTerm::FloatLiteral(a) => match b { EncodedTerm::FloatLiteral(a) => match b {
EncodedTerm::FloatLiteral(ref b) => a.partial_cmp(b), EncodedTerm::FloatLiteral(ref b) => a.partial_cmp(b),
EncodedTerm::DoubleLiteral(ref b) => f64::from(a).partial_cmp(b), EncodedTerm::DoubleLiteral(ref b) => f64::from(a).partial_cmp(b),
@ -1997,6 +2114,14 @@ where
) )
} }
fn compare_str_id_str(&self, a: S::StrId, b: &str) -> Option<Ordering> {
Some(self.dataset.get_str(a).ok()??.as_str().cmp(b))
}
fn compare_str_str_id(&self, a: &str, b: S::StrId) -> Option<Ordering> {
Some(a.cmp(self.dataset.get_str(b).ok()??.as_str()))
}
fn hash<H: Digest>( fn hash<H: Digest>(
&self, &self,
arg: &PlanExpression<S::StrId>, arg: &PlanExpression<S::StrId>,
@ -2011,16 +2136,21 @@ where
//TODO: optimize? //TODO: optimize?
match value { match value {
EncodedTerm::NamedNode { .. } EncodedTerm::NamedNode { .. }
| EncodedTerm::NamedBlankNode { .. } | EncodedTerm::SmallBlankNode { .. }
| EncodedTerm::InlineBlankNode { .. } | EncodedTerm::BigBlankNode { .. }
| EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::DefaultGraph => None, | EncodedTerm::DefaultGraph => None,
EncodedTerm::StringLiteral { .. } => { EncodedTerm::SmallStringLiteral(_) | EncodedTerm::BigStringLiteral { .. } => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#string") self.build_named_node("http://www.w3.org/2001/XMLSchema#string")
} }
EncodedTerm::LangStringLiteral { .. } => { EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. } => {
self.build_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString") self.build_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
} }
EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { EncodedTerm::SmallTypedLiteral { datatype_id, .. }
| EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: datatype_id, iri_id: datatype_id,
}), }),
EncodedTerm::BooleanLiteral(..) => { EncodedTerm::BooleanLiteral(..) => {
@ -2504,7 +2634,7 @@ fn get_triple_template_value<I: StrId>(
} }
fn new_bnode<I: StrId>() -> EncodedTerm<I> { fn new_bnode<I: StrId>() -> EncodedTerm<I> {
EncodedTerm::InlineBlankNode { id: random() } EncodedTerm::NumericalBlankNode { id: random() }
} }
fn decode_triple<D: Decoder>( fn decode_triple<D: Decoder>(
@ -2922,7 +3052,7 @@ impl<I: StrId> Accumulator<I> for SampleAccumulator<I> {
struct GroupConcatAccumulator<S: ReadableEncodedStore + 'static> { struct GroupConcatAccumulator<S: ReadableEncodedStore + 'static> {
eval: SimpleEvaluator<S>, eval: SimpleEvaluator<S>,
concat: Option<String>, concat: Option<String>,
language: Option<Option<S::StrId>>, language: Option<Option<SmallStringOrId<S::StrId>>>,
separator: Rc<String>, separator: Rc<String>,
} }
@ -3001,6 +3131,24 @@ fn write_hexa_bytes(bytes: &[u8], buffer: &mut String) {
} }
} }
#[derive(Eq, PartialEq, Clone, Copy)]
enum SmallStringOrId<I: StrId> {
Small(SmallString),
Big(I),
}
impl<I: StrId> From<SmallString> for SmallStringOrId<I> {
fn from(value: SmallString) -> Self {
Self::Small(value)
}
}
impl<I: StrId> From<I> for SmallStringOrId<I> {
fn from(value: I) -> Self {
Self::Big(value)
}
}
#[test] #[test]
fn uuid() { fn uuid() {
let mut buffer = String::default(); let mut buffer = String::default();

@ -1,6 +1,7 @@
use crate::error::invalid_data_error; use crate::error::invalid_data_error;
use crate::model::xsd::*; use crate::model::xsd::*;
use crate::store::numeric_encoder::StrId; use crate::store::numeric_encoder::StrId;
use crate::store::small_string::SmallString;
use siphasher::sip128::{Hasher128, SipHasher24}; use siphasher::sip128::{Hasher128, SipHasher24};
use std::hash::Hasher; use std::hash::Hasher;
use std::io; use std::io;
@ -11,33 +12,38 @@ type EncodedTerm = crate::store::numeric_encoder::EncodedTerm<StrHash>;
type EncodedQuad = crate::store::numeric_encoder::EncodedQuad<StrHash>; type EncodedQuad = crate::store::numeric_encoder::EncodedQuad<StrHash>;
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>(); pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
// Encoded term type blocks
// 1-7: usual named nodes (except prefixes c.f. later)
// 8-15: blank nodes
// 16-47: literals
// 48-64: future use
// 64-127: default named node prefixes
// 128-255: custom named node prefixes
const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_INLINE_BLANK_NODE_ID: u8 = 2; const TYPE_NUMERICAL_BLANK_NODE_ID: u8 = 8;
const TYPE_NAMED_BLANK_NODE_ID: u8 = 3; const TYPE_SMALL_BLANK_NODE_ID: u8 = 9;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 4; const TYPE_BIG_BLANK_NODE_ID: u8 = 10;
const TYPE_TYPED_LITERAL_ID: u8 = 5; const TYPE_SMALL_STRING_LITERAL: u8 = 16;
const TYPE_STRING_LITERAL: u8 = 6; const TYPE_BIG_STRING_LITERAL: u8 = 17;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7; const TYPE_SMALL_SMALL_LANG_STRING_LITERAL: u8 = 20;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8; const TYPE_SMALL_BIG_LANG_STRING_LITERAL: u8 = 21;
const TYPE_FLOAT_LITERAL: u8 = 9; const TYPE_BIG_SMALL_LANG_STRING_LITERAL: u8 = 22;
const TYPE_DOUBLE_LITERAL: u8 = 10; const TYPE_BIG_BIG_LANG_STRING_LITERAL: u8 = 23;
const TYPE_INTEGER_LITERAL: u8 = 11; const TYPE_SMALL_TYPED_LITERAL: u8 = 24;
const TYPE_DECIMAL_LITERAL: u8 = 12; const TYPE_BIG_TYPED_LITERAL: u8 = 25;
const TYPE_DATE_TIME_LITERAL: u8 = 13; const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 28;
const TYPE_DATE_LITERAL: u8 = 14; const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 29;
const TYPE_TIME_LITERAL: u8 = 15; const TYPE_FLOAT_LITERAL: u8 = 30;
const TYPE_DURATION_LITERAL: u8 = 16; const TYPE_DOUBLE_LITERAL: u8 = 31;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17; const TYPE_INTEGER_LITERAL: u8 = 32;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18; const TYPE_DECIMAL_LITERAL: u8 = 33;
const TYPE_DATE_TIME_LITERAL: u8 = 34;
pub trait SerializableStrId: StrId { const TYPE_DATE_LITERAL: u8 = 35;
fn len() -> usize; const TYPE_TIME_LITERAL: u8 = 36;
const TYPE_DURATION_LITERAL: u8 = 37;
fn from_be_bytes(bytes: &[u8]) -> Self; const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 38;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 39;
fn push_be_bytes(&self, buffer: &mut Vec<u8>);
}
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)] #[repr(transparent)]
@ -69,24 +75,6 @@ impl StrHash {
impl StrId for StrHash {} impl StrId for StrHash {}
impl SerializableStrId for StrHash {
fn len() -> usize {
16
}
fn from_be_bytes(bytes: &[u8]) -> Self {
let mut hash = [0; 16];
hash.copy_from_slice(bytes);
Self {
hash: u128::from_be_bytes(hash),
}
}
fn push_be_bytes(&self, buffer: &mut Vec<u8>) {
buffer.extend_from_slice(&self.to_be_bytes())
}
}
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub enum QuadEncoding { pub enum QuadEncoding {
SPOG, SPOG,
@ -240,7 +228,6 @@ impl<R: Read> TermReader for R {
let mut type_buffer = [0]; let mut type_buffer = [0];
self.read_exact(&mut type_buffer)?; self.read_exact(&mut type_buffer)?;
match type_buffer[0] { match type_buffer[0] {
TYPE_DEFAULT_GRAPH_ID => Ok(EncodedTerm::DefaultGraph),
TYPE_NAMED_NODE_ID => { TYPE_NAMED_NODE_ID => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
@ -248,44 +235,100 @@ impl<R: Read> TermReader for R {
iri_id: StrHash::from_be_bytes(buffer), iri_id: StrHash::from_be_bytes(buffer),
}) })
} }
TYPE_INLINE_BLANK_NODE_ID => { TYPE_NUMERICAL_BLANK_NODE_ID => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::InlineBlankNode { Ok(EncodedTerm::NumericalBlankNode {
id: u128::from_be_bytes(buffer), id: u128::from_be_bytes(buffer),
}) })
} }
TYPE_NAMED_BLANK_NODE_ID => { TYPE_SMALL_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::SmallBlankNode(
SmallString::from_be_bytes(buffer).map_err(invalid_data_error)?,
))
}
TYPE_BIG_BLANK_NODE_ID => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedBlankNode { Ok(EncodedTerm::BigBlankNode {
id_id: StrHash::from_be_bytes(buffer), id_id: StrHash::from_be_bytes(buffer),
}) })
} }
TYPE_LANG_STRING_LITERAL_ID => { TYPE_SMALL_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16]; let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?; self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16]; let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?; self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral { Ok(EncodedTerm::SmallSmallLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
language: SmallString::from_be_bytes(language_buffer)
.map_err(invalid_data_error)?,
})
}
TYPE_SMALL_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallBigLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
language_id: StrHash::from_be_bytes(language_buffer), language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_BIG_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer),
language: SmallString::from_be_bytes(language_buffer)
.map_err(invalid_data_error)?,
})
}
TYPE_BIG_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer), value_id: StrHash::from_be_bytes(value_buffer),
language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_SMALL_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value: SmallString::from_be_bytes(value_buffer).map_err(invalid_data_error)?,
}) })
} }
TYPE_TYPED_LITERAL_ID => { TYPE_BIG_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16]; let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?; self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16]; let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?; self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral { Ok(EncodedTerm::BigTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer), datatype_id: StrHash::from_be_bytes(datatype_buffer),
value_id: StrHash::from_be_bytes(value_buffer), value_id: StrHash::from_be_bytes(value_buffer),
}) })
} }
TYPE_STRING_LITERAL => { TYPE_SMALL_STRING_LITERAL => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral { Ok(EncodedTerm::SmallStringLiteral(
SmallString::from_be_bytes(buffer).map_err(invalid_data_error)?,
))
}
TYPE_BIG_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::BigStringLiteral {
value_id: StrHash::from_be_bytes(buffer), value_id: StrHash::from_be_bytes(buffer),
}) })
} }
@ -451,38 +494,66 @@ pub fn encode_term_quad(
pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) { pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
match term { match term {
EncodedTerm::DefaultGraph => sink.push(TYPE_DEFAULT_GRAPH_ID), EncodedTerm::DefaultGraph => (),
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { iri_id } => {
sink.push(TYPE_NAMED_NODE_ID); sink.push(TYPE_NAMED_NODE_ID);
iri_id.push_be_bytes(sink) sink.extend_from_slice(&iri_id.to_be_bytes());
} }
EncodedTerm::InlineBlankNode { id } => { EncodedTerm::NumericalBlankNode { id } => {
sink.push(TYPE_INLINE_BLANK_NODE_ID); sink.push(TYPE_NUMERICAL_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes()) sink.extend_from_slice(&id.to_be_bytes())
} }
EncodedTerm::NamedBlankNode { id_id } => { EncodedTerm::SmallBlankNode(id) => {
sink.push(TYPE_NAMED_BLANK_NODE_ID); sink.push(TYPE_SMALL_BLANK_NODE_ID);
id_id.push_be_bytes(sink) sink.extend_from_slice(&id.to_be_bytes())
} }
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::BigBlankNode { id_id } => {
sink.push(TYPE_STRING_LITERAL); sink.push(TYPE_BIG_BLANK_NODE_ID);
value_id.push_be_bytes(sink) sink.extend_from_slice(&id_id.to_be_bytes());
} }
EncodedTerm::LangStringLiteral { EncodedTerm::SmallStringLiteral(value) => {
sink.push(TYPE_SMALL_STRING_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::BigStringLiteral { value_id } => {
sink.push(TYPE_BIG_STRING_LITERAL);
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
sink.push(TYPE_SMALL_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
sink.push(TYPE_SMALL_BIG_LANG_STRING_LITERAL);
sink.extend_from_slice(&language_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
sink.push(TYPE_BIG_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => { } => {
sink.push(TYPE_LANG_STRING_LITERAL_ID); sink.push(TYPE_BIG_BIG_LANG_STRING_LITERAL);
value_id.push_be_bytes(sink); sink.extend_from_slice(&language_id.to_be_bytes());
language_id.push_be_bytes(sink); sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
sink.push(TYPE_SMALL_TYPED_LITERAL);
sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
} }
EncodedTerm::TypedLiteral { EncodedTerm::BigTypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => { } => {
sink.push(TYPE_TYPED_LITERAL_ID); sink.push(TYPE_BIG_TYPED_LITERAL);
value_id.push_be_bytes(sink); sink.extend_from_slice(&datatype_id.to_be_bytes());
datatype_id.push_be_bytes(sink); sink.extend_from_slice(&value_id.to_be_bytes());
} }
EncodedTerm::BooleanLiteral(true) => sink.push(TYPE_BOOLEAN_LITERAL_TRUE), EncodedTerm::BooleanLiteral(true) => sink.push(TYPE_BOOLEAN_LITERAL_TRUE),
EncodedTerm::BooleanLiteral(false) => sink.push(TYPE_BOOLEAN_LITERAL_FALSE), EncodedTerm::BooleanLiteral(false) => sink.push(TYPE_BOOLEAN_LITERAL_FALSE),
@ -528,3 +599,112 @@ pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
} }
} }
} }
#[cfg(test)]
mod test {
use super::*;
use crate::store::numeric_encoder::*;
use std::collections::HashMap;
use std::convert::Infallible;
struct MemoryStrStore {
id2str: HashMap<StrHash, String>,
}
impl Default for MemoryStrStore {
fn default() -> Self {
Self {
id2str: HashMap::default(),
}
}
}
impl WithStoreError for MemoryStrStore {
type Error = Infallible;
type StrId = StrHash;
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Infallible> {
Ok(self.id2str.get(&id).cloned())
}
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Infallible> {
let id = StrHash::new(value);
Ok(if self.id2str.contains_key(&id) {
Some(id)
} else {
None
})
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&mut self, value: &str) -> Result<StrHash, Infallible> {
let key = StrHash::new(value);
self.id2str.entry(key).or_insert_with(|| value.to_owned());
Ok(key)
}
}
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
use crate::model::*;
let mut store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
NamedNode::new_unchecked("http://bar.com").into(),
NamedNode::new_unchecked("http://foo.com").into(),
BlankNode::default().into(),
BlankNode::new_unchecked("bnode").into(),
BlankNode::new_unchecked("foo-bnode-thisisaverylargeblanknode").into(),
Literal::new_simple_literal("literal").into(),
BlankNode::new_unchecked("foo-literal-thisisaverylargestringliteral").into(),
Literal::from(true).into(),
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo-string").into(),
Literal::new_language_tagged_literal_unchecked("foo-fr", "fr").into(),
Literal::new_language_tagged_literal_unchecked(
"foo-fr-literal-thisisaverylargelanguagetaggedstringliteral",
"fr",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big-literal-thisisaverylargelanguagetaggedstringliteral",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION).into(),
Literal::new_typed_literal("-foo", NamedNode::new_unchecked("http://foo.com")).into(),
Literal::new_typed_literal(
"-foo-thisisaverybigtypedliteralwiththefoodatatype",
NamedNode::new_unchecked("http://foo.com"),
)
.into(),
];
for term in terms {
let encoded = store.encode_term(term.as_ref()).unwrap();
assert_eq!(
Some(encoded),
store.get_encoded_term(term.as_ref()).unwrap()
);
assert_eq!(term, store.decode_term(encoded).unwrap());
let mut buffer = Vec::new();
write_term(&mut buffer, encoded);
assert_eq!(encoded, Cursor::new(&buffer).read_term().unwrap());
}
}
}

@ -1394,7 +1394,7 @@ fn label(g: &MemoryStore, hashes: &HashMap<EncodedTerm, u64>) -> Vec<String> {
fn map_term(term: EncodedTerm, bnodes_hash: &HashMap<EncodedTerm, u64>) -> EncodedTerm { fn map_term(term: EncodedTerm, bnodes_hash: &HashMap<EncodedTerm, u64>) -> EncodedTerm {
if term.is_blank_node() { if term.is_blank_node() {
EncodedTerm::InlineBlankNode { EncodedTerm::NumericalBlankNode {
id: (*bnodes_hash.get(&term).unwrap()).into(), id: (*bnodes_hash.get(&term).unwrap()).into(),
} }
} else { } else {

@ -11,6 +11,7 @@ pub(crate) mod numeric_encoder;
pub mod rocksdb; pub mod rocksdb;
#[cfg(feature = "sled")] #[cfg(feature = "sled")]
pub mod sled; pub mod sled;
pub(crate) mod small_string;
pub use crate::store::memory::MemoryStore; pub use crate::store::memory::MemoryStore;
#[cfg(feature = "rocksdb")] #[cfg(feature = "rocksdb")]

@ -4,11 +4,11 @@ use crate::error::invalid_data_error;
use crate::model::xsd::*; use crate::model::xsd::*;
use crate::model::*; use crate::model::*;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use lasso::{Rodeo, Spur}; use crate::store::small_string::SmallString;
use rand::random; use rand::random;
use rio_api::model as rio; use rio_api::model as rio;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible; use std::convert::{TryFrom, TryInto};
use std::error::Error; use std::error::Error;
use std::fmt::Debug; use std::fmt::Debug;
use std::hash::Hash; use std::hash::Hash;
@ -20,12 +20,44 @@ pub trait StrId: Eq + Debug + Copy + Hash {}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum EncodedTerm<I: StrId> { pub enum EncodedTerm<I: StrId> {
DefaultGraph, DefaultGraph,
NamedNode { iri_id: I }, NamedNode {
InlineBlankNode { id: u128 }, iri_id: I,
NamedBlankNode { id_id: I }, },
StringLiteral { value_id: I }, NumericalBlankNode {
LangStringLiteral { value_id: I, language_id: I }, id: u128,
TypedLiteral { value_id: I, datatype_id: I }, },
SmallBlankNode(SmallString),
BigBlankNode {
id_id: I,
},
SmallStringLiteral(SmallString),
BigStringLiteral {
value_id: I,
},
SmallSmallLangStringLiteral {
value: SmallString,
language: SmallString,
},
SmallBigLangStringLiteral {
value: SmallString,
language_id: I,
},
BigSmallLangStringLiteral {
value_id: I,
language: SmallString,
},
BigBigLangStringLiteral {
value_id: I,
language_id: I,
},
SmallTypedLiteral {
value: SmallString,
datatype_id: I,
},
BigTypedLiteral {
value_id: I,
datatype_id: I,
},
BooleanLiteral(bool), BooleanLiteral(bool),
FloatLiteral(f32), FloatLiteral(f32),
DoubleLiteral(f64), DoubleLiteral(f64),
@ -46,36 +78,78 @@ impl<I: StrId> PartialEq for EncodedTerm<I> {
(Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => { (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => {
iri_id_a == iri_id_b iri_id_a == iri_id_b
} }
(Self::InlineBlankNode { id: id_a }, Self::InlineBlankNode { id: id_b }) => { (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => {
id_a == id_b id_a == id_b
} }
(Self::NamedBlankNode { id_id: id_a }, Self::NamedBlankNode { id_id: id_b }) => { (Self::SmallBlankNode(id_a), Self::SmallBlankNode(id_b)) => id_a == id_b,
(Self::BigBlankNode { id_id: id_a }, Self::BigBlankNode { id_id: id_b }) => {
id_a == id_b id_a == id_b
} }
(Self::SmallStringLiteral(a), Self::SmallStringLiteral(b)) => a == b,
( (
Self::StringLiteral { Self::BigStringLiteral {
value_id: value_id_a, value_id: value_id_a,
}, },
Self::StringLiteral { Self::BigStringLiteral {
value_id: value_id_b, value_id: value_id_b,
}, },
) => value_id_a == value_id_b, ) => value_id_a == value_id_b,
( (
Self::LangStringLiteral { Self::SmallSmallLangStringLiteral {
value: value_a,
language: language_a,
},
Self::SmallSmallLangStringLiteral {
value: value_b,
language: language_b,
},
) => value_a == value_b && language_a == language_b,
(
Self::SmallBigLangStringLiteral {
value: value_a,
language_id: language_id_a,
},
Self::SmallBigLangStringLiteral {
value: value_b,
language_id: language_id_b,
},
) => value_a == value_b && language_id_a == language_id_b,
(
Self::BigSmallLangStringLiteral {
value_id: value_id_a,
language: language_a,
},
Self::BigSmallLangStringLiteral {
value_id: value_id_b,
language: language_b,
},
) => value_id_a == value_id_b && language_a == language_b,
(
Self::BigBigLangStringLiteral {
value_id: value_id_a, value_id: value_id_a,
language_id: language_id_a, language_id: language_id_a,
}, },
Self::LangStringLiteral { Self::BigBigLangStringLiteral {
value_id: value_id_b, value_id: value_id_b,
language_id: language_id_b, language_id: language_id_b,
}, },
) => value_id_a == value_id_b && language_id_a == language_id_b, ) => value_id_a == value_id_b && language_id_a == language_id_b,
( (
Self::TypedLiteral { Self::SmallTypedLiteral {
value: value_a,
datatype_id: datatype_id_a,
},
Self::SmallTypedLiteral {
value: value_b,
datatype_id: datatype_id_b,
},
) => value_a == value_b && datatype_id_a == datatype_id_b,
(
Self::BigTypedLiteral {
value_id: value_id_a, value_id: value_id_a,
datatype_id: datatype_id_a, datatype_id: datatype_id_a,
}, },
Self::TypedLiteral { Self::BigTypedLiteral {
value_id: value_id_b, value_id: value_id_b,
datatype_id: datatype_id_b, datatype_id: datatype_id_b,
}, },
@ -114,18 +188,36 @@ impl<I: StrId> Hash for EncodedTerm<I> {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
match self { match self {
Self::NamedNode { iri_id } => iri_id.hash(state), Self::NamedNode { iri_id } => iri_id.hash(state),
Self::InlineBlankNode { id } => id.hash(state), Self::NumericalBlankNode { id } => id.hash(state),
Self::NamedBlankNode { id_id } => id_id.hash(state), Self::SmallBlankNode(id) => id.hash(state),
Self::BigBlankNode { id_id } => id_id.hash(state),
Self::DefaultGraph => (), Self::DefaultGraph => (),
Self::StringLiteral { value_id } => value_id.hash(state), Self::SmallStringLiteral(value) => value.hash(state),
Self::LangStringLiteral { Self::BigStringLiteral { value_id } => value_id.hash(state),
Self::SmallSmallLangStringLiteral { value, language } => {
value.hash(state);
language.hash(state);
}
Self::SmallBigLangStringLiteral { value, language_id } => {
value.hash(state);
language_id.hash(state);
}
Self::BigSmallLangStringLiteral { value_id, language } => {
value_id.hash(state);
language.hash(state);
}
Self::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => { } => {
value_id.hash(state); value_id.hash(state);
language_id.hash(state); language_id.hash(state);
} }
Self::TypedLiteral { Self::SmallTypedLiteral { value, datatype_id } => {
value.hash(state);
datatype_id.hash(state);
}
Self::BigTypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => { } => {
@ -157,16 +249,23 @@ impl<I: StrId> EncodedTerm<I> {
pub fn is_blank_node(&self) -> bool { pub fn is_blank_node(&self) -> bool {
match self { match self {
Self::InlineBlankNode { .. } | Self::NamedBlankNode { .. } => true, Self::NumericalBlankNode { .. }
| Self::SmallBlankNode { .. }
| Self::BigBlankNode { .. } => true,
_ => false, _ => false,
} }
} }
pub fn is_literal(&self) -> bool { pub fn is_literal(&self) -> bool {
match self { match self {
Self::StringLiteral { .. } Self::SmallStringLiteral { .. }
| Self::LangStringLiteral { .. } | Self::BigStringLiteral { .. }
| Self::TypedLiteral { .. } | Self::SmallSmallLangStringLiteral { .. }
| Self::SmallBigLangStringLiteral { .. }
| Self::BigSmallLangStringLiteral { .. }
| Self::BigBigLangStringLiteral { .. }
| Self::SmallTypedLiteral { .. }
| Self::BigTypedLiteral { .. }
| Self::BooleanLiteral(_) | Self::BooleanLiteral(_)
| Self::FloatLiteral(_) | Self::FloatLiteral(_)
| Self::DoubleLiteral(_) | Self::DoubleLiteral(_)
@ -192,24 +291,45 @@ impl<I: StrId> EncodedTerm<I> {
Self::NamedNode { iri_id } => EncodedTerm::NamedNode { Self::NamedNode { iri_id } => EncodedTerm::NamedNode {
iri_id: mapping(iri_id), iri_id: mapping(iri_id),
}, },
Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id }, Self::NumericalBlankNode { id } => EncodedTerm::NumericalBlankNode { id },
Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode { Self::SmallBlankNode(id) => EncodedTerm::SmallBlankNode(id),
Self::BigBlankNode { id_id } => EncodedTerm::BigBlankNode {
id_id: mapping(id_id), id_id: mapping(id_id),
}, },
Self::StringLiteral { value_id } => EncodedTerm::StringLiteral { Self::SmallStringLiteral(value) => EncodedTerm::SmallStringLiteral(value),
Self::BigStringLiteral { value_id } => EncodedTerm::BigStringLiteral {
value_id: mapping(value_id), value_id: mapping(value_id),
}, },
Self::LangStringLiteral { Self::SmallSmallLangStringLiteral { value, language } => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
Self::SmallBigLangStringLiteral { value, language_id } => {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: mapping(language_id),
}
}
Self::BigSmallLangStringLiteral { value_id, language } => {
EncodedTerm::BigSmallLangStringLiteral {
value_id: mapping(value_id),
language,
}
}
Self::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => EncodedTerm::LangStringLiteral { } => EncodedTerm::BigBigLangStringLiteral {
value_id: mapping(value_id), value_id: mapping(value_id),
language_id: mapping(language_id), language_id: mapping(language_id),
}, },
Self::TypedLiteral { Self::SmallTypedLiteral { value, datatype_id } => EncodedTerm::SmallTypedLiteral {
value,
datatype_id: mapping(datatype_id),
},
Self::BigTypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => EncodedTerm::TypedLiteral { } => EncodedTerm::BigTypedLiteral {
value_id: mapping(value_id), value_id: mapping(value_id),
datatype_id: mapping(datatype_id), datatype_id: mapping(datatype_id),
}, },
@ -233,24 +353,45 @@ impl<I: StrId> EncodedTerm<I> {
Self::NamedNode { iri_id } => EncodedTerm::NamedNode { Self::NamedNode { iri_id } => EncodedTerm::NamedNode {
iri_id: mapping(iri_id)?, iri_id: mapping(iri_id)?,
}, },
Self::InlineBlankNode { id } => EncodedTerm::InlineBlankNode { id }, Self::NumericalBlankNode { id } => EncodedTerm::NumericalBlankNode { id },
Self::NamedBlankNode { id_id } => EncodedTerm::NamedBlankNode { Self::SmallBlankNode(id) => EncodedTerm::SmallBlankNode(id),
Self::BigBlankNode { id_id } => EncodedTerm::BigBlankNode {
id_id: mapping(id_id)?, id_id: mapping(id_id)?,
}, },
Self::StringLiteral { value_id } => EncodedTerm::StringLiteral { Self::SmallStringLiteral(value) => EncodedTerm::SmallStringLiteral(value),
Self::BigStringLiteral { value_id } => EncodedTerm::BigStringLiteral {
value_id: mapping(value_id)?, value_id: mapping(value_id)?,
}, },
Self::LangStringLiteral { Self::SmallSmallLangStringLiteral { value, language } => {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
}
Self::SmallBigLangStringLiteral { value, language_id } => {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: mapping(language_id)?,
}
}
Self::BigSmallLangStringLiteral { value_id, language } => {
EncodedTerm::BigSmallLangStringLiteral {
value_id: mapping(value_id)?,
language,
}
}
Self::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => EncodedTerm::LangStringLiteral { } => EncodedTerm::BigBigLangStringLiteral {
value_id: mapping(value_id)?, value_id: mapping(value_id)?,
language_id: mapping(language_id)?, language_id: mapping(language_id)?,
}, },
Self::TypedLiteral { Self::SmallTypedLiteral { value, datatype_id } => EncodedTerm::SmallTypedLiteral {
value,
datatype_id: mapping(datatype_id)?,
},
Self::BigTypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => EncodedTerm::TypedLiteral { } => EncodedTerm::BigTypedLiteral {
value_id: mapping(value_id)?, value_id: mapping(value_id)?,
datatype_id: mapping(datatype_id)?, datatype_id: mapping(datatype_id)?,
}, },
@ -397,35 +538,6 @@ pub(crate) trait StrContainer: WithStoreError {
fn insert_str(&mut self, value: &str) -> Result<Self::StrId, Self::Error>; fn insert_str(&mut self, value: &str) -> Result<Self::StrId, Self::Error>;
} }
#[derive(Default)]
pub struct MemoryStrStore {
inner: Rodeo,
}
impl StrId for Spur {}
impl WithStoreError for MemoryStrStore {
type Error = Infallible;
type StrId = Spur;
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, id: Spur) -> Result<Option<String>, Infallible> {
//TODO: avoid copy by adding a lifetime limit to get_str
Ok(self.inner.try_resolve(&id).map(|e| e.to_owned()))
}
fn get_str_id(&self, value: &str) -> Result<Option<Spur>, Infallible> {
Ok(self.inner.get(value))
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&mut self, value: &str) -> Result<Spur, Infallible> {
Ok(self.inner.get_or_intern(value))
}
}
/// Tries to encode a term based on the existing strings (does not insert anything) /// Tries to encode a term based on the existing strings (does not insert anything)
pub(crate) trait ReadEncoder: WithStoreError { pub(crate) trait ReadEncoder: WithStoreError {
fn get_encoded_named_node( fn get_encoded_named_node(
@ -446,15 +558,20 @@ pub(crate) trait ReadEncoder: WithStoreError {
blank_node: BlankNodeRef<'_>, blank_node: BlankNodeRef<'_>,
) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> { ) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> {
Ok(Some(if let Some(id) = blank_node.id() { Ok(Some(if let Some(id) = blank_node.id() {
EncodedTerm::InlineBlankNode { id } EncodedTerm::NumericalBlankNode { id }
} else {
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else { } else {
EncodedTerm::NamedBlankNode { EncodedTerm::BigBlankNode {
id_id: if let Some(id_id) = self.get_encoded_str(blank_node.as_str())? { id_id: if let Some(id_id) = self.get_encoded_str(id)? {
id_id id_id
} else { } else {
return Ok(None); return Ok(None);
}, },
} }
}
})) }))
} }
@ -462,14 +579,39 @@ pub(crate) trait ReadEncoder: WithStoreError {
&self, &self,
literal: LiteralRef<'_>, literal: LiteralRef<'_>,
) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> { ) -> Result<Option<EncodedTerm<Self::StrId>>, Self::Error> {
let value = literal.value();
let datatype = literal.datatype().as_str();
Ok(Some( Ok(Some(
match match literal.datatype().as_str() { match match datatype {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => { "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
Some(EncodedTerm::LangStringLiteral { if let Ok(value) = SmallString::try_from(value) {
value_id: if let Some(value_id) = if let Ok(language) = SmallString::try_from(language) {
self.get_encoded_str(literal.value())? Some(EncodedTerm::SmallSmallLangStringLiteral { value, language })
} else {
Some(EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: if let Some(language_id) =
self.get_encoded_str(language)?
{ {
language_id
} else {
return Ok(None);
},
})
}
} else if let Ok(language) = SmallString::try_from(language) {
Some(EncodedTerm::BigSmallLangStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id
} else {
return Ok(None);
},
language,
})
} else {
Some(EncodedTerm::BigBigLangStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id value_id
} else { } else {
return Ok(None); return Ok(None);
@ -482,20 +624,28 @@ pub(crate) trait ReadEncoder: WithStoreError {
return Ok(None); return Ok(None);
}, },
}) })
}
} else { } else {
None None
} }
} }
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(literal.value()), "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral { "http://www.w3.org/2001/XMLSchema#string" => {
value_id: if let Some(value_id) = self.get_encoded_str(literal.value())? { let value = value;
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id value_id
} else { } else {
return Ok(None); return Ok(None);
}, },
}), }
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(literal.value()), })
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(literal.value()), }
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer" "http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte" | "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short" | "http://www.w3.org/2001/XMLSchema#short"
@ -508,40 +658,51 @@ pub(crate) trait ReadEncoder: WithStoreError {
| "http://www.w3.org/2001/XMLSchema#positiveInteger" | "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger" | "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => { | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
parse_integer_str(literal.value()) "http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
} "http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(literal.value()), "http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#dateTime" "http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => { | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
parse_date_time_str(literal.value()) "http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
}
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => { "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(literal.value()) parse_year_month_duration_str(value)
} }
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => { "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
parse_day_time_duration_str(literal.value()) parse_day_time_duration_str(value)
} }
_ => None, _ => None,
} { } {
Some(term) => term, Some(term) => term,
None => EncodedTerm::TypedLiteral { None => {
value_id: if let Some(value_id) = self.get_encoded_str(literal.value())? { if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: if let Some(datatype_id) =
self.get_encoded_str(datatype)?
{
datatype_id
} else {
return Ok(None);
},
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: if let Some(value_id) = self.get_encoded_str(value)? {
value_id value_id
} else { } else {
return Ok(None); return Ok(None);
}, },
datatype_id: if let Some(datatype_id) = datatype_id: if let Some(datatype_id) =
self.get_encoded_str(literal.datatype().as_str())? self.get_encoded_str(datatype)?
{ {
datatype_id datatype_id
} else { } else {
return Ok(None); return Ok(None);
}, },
}, }
}
}
}, },
)) ))
} }
@ -628,14 +789,19 @@ pub(crate) trait WriteEncoder: WithStoreError {
&mut self, &mut self,
blank_node: BlankNodeRef<'_>, blank_node: BlankNodeRef<'_>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> { ) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
if let Some(id) = blank_node.id() { Ok(if let Some(id) = blank_node.id() {
Ok(EncodedTerm::InlineBlankNode { id }) EncodedTerm::NumericalBlankNode { id }
} else { } else {
Ok(EncodedTerm::NamedBlankNode { let id = blank_node.as_str();
id_id: self.encode_str(blank_node.as_str())?, if let Ok(id) = id.try_into() {
}) EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: self.encode_str(id)?,
} }
} }
})
}
fn encode_literal( fn encode_literal(
&mut self, &mut self,
@ -710,11 +876,11 @@ pub(crate) trait WriteEncoder: WithStoreError {
bnodes_map: &mut HashMap<String, u128>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> { ) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
Ok(if let Some(id) = bnodes_map.get(blank_node.id) { Ok(if let Some(id) = bnodes_map.get(blank_node.id) {
EncodedTerm::InlineBlankNode { id: *id } EncodedTerm::NumericalBlankNode { id: *id }
} else { } else {
let id = random::<u128>(); let id = random::<u128>();
bnodes_map.insert(blank_node.id.to_owned(), id); bnodes_map.insert(blank_node.id.to_owned(), id);
EncodedTerm::InlineBlankNode { id } EncodedTerm::NumericalBlankNode { id }
}) })
} }
fn encode_rio_literal( fn encode_rio_literal(
@ -722,21 +888,49 @@ pub(crate) trait WriteEncoder: WithStoreError {
literal: rio::Literal<'_>, literal: rio::Literal<'_>,
) -> Result<EncodedTerm<Self::StrId>, Self::Error> { ) -> Result<EncodedTerm<Self::StrId>, Self::Error> {
Ok(match literal { Ok(match literal {
rio::Literal::Simple { value } => EncodedTerm::StringLiteral { rio::Literal::Simple { value } => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(value)?, value_id: self.encode_str(value)?,
}, }
}
}
rio::Literal::LanguageTaggedString { value, language } => { rio::Literal::LanguageTaggedString { value, language } => {
EncodedTerm::LangStringLiteral { if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: self.encode_str(language)?,
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: self.encode_str(value)?,
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: self.encode_str(value)?, value_id: self.encode_str(value)?,
language_id: self.encode_str(language)?, language_id: self.encode_str(language)?,
} }
} }
}
rio::Literal::Typed { value, datatype } => { rio::Literal::Typed { value, datatype } => {
match match datatype.iri { match match datatype.iri {
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value), "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral { "http://www.w3.org/2001/XMLSchema#string" => {
Some(if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(value)?, value_id: self.encode_str(value)?,
}), }
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value), "http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value), "http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer" "http://www.w3.org/2001/XMLSchema#integer"
@ -771,10 +965,19 @@ pub(crate) trait WriteEncoder: WithStoreError {
_ => None, _ => None,
} { } {
Some(v) => v, Some(v) => v,
None => EncodedTerm::TypedLiteral { None => {
if let Ok(value) = SmallString::try_from(value) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: self.encode_str(datatype.iri)?,
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: self.encode_str(value)?, value_id: self.encode_str(value)?,
datatype_id: self.encode_str(datatype.iri)?, datatype_id: self.encode_str(datatype.iri)?,
}, }
}
}
} }
} }
}) })
@ -968,14 +1171,33 @@ impl<S: StrLookup> Decoder for S {
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into()) Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
} }
EncodedTerm::InlineBlankNode { id } => Ok(BlankNode::new_from_unique_id(id).into()), EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(id).into()),
EncodedTerm::NamedBlankNode { id_id } => { EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into()) Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
} }
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::SmallStringLiteral(value) => Ok(Literal::new_simple_literal(value).into()),
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into()) Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
} }
EncodedTerm::LangStringLiteral { EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(value, language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
value,
get_required_str(self, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id, value_id,
language_id, language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked( } => Ok(Literal::new_language_tagged_literal_unchecked(
@ -983,7 +1205,14 @@ impl<S: StrLookup> Decoder for S {
get_required_str(self, language_id)?, get_required_str(self, language_id)?,
) )
.into()), .into()),
EncodedTerm::TypedLiteral { EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
value,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => Ok(Literal::new_typed_literal( } => Ok(Literal::new_typed_literal(
@ -1053,42 +1282,3 @@ impl<E: Into<io::Error>> From<DecoderError<E>> for io::Error {
} }
} }
} }
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
let mut store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
NamedNode::new_unchecked("http://bar.com").into(),
NamedNode::new_unchecked("http://foo.com").into(),
BlankNode::default().into(),
BlankNode::new_unchecked("foo-bnode").into(),
Literal::new_simple_literal("foo-literal").into(),
Literal::from(true).into(),
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo-string").into(),
Literal::new_language_tagged_literal("foo-fr", "fr")
.unwrap()
.into(),
Literal::new_language_tagged_literal("foo-FR", "FR")
.unwrap()
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION).into(),
Literal::new_typed_literal("-foo", NamedNode::new_unchecked("http://foo.com")).into(),
];
for term in terms {
let encoded = store.encode_term(term.as_ref()).unwrap();
assert_eq!(
Some(encoded),
store.get_encoded_term(term.as_ref()).unwrap()
);
assert_eq!(term, store.decode_term(encoded).unwrap());
}
}

@ -0,0 +1,205 @@
use nom::lib::std::convert::TryFrom;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::convert::TryInto;
use std::error::Error;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::str;
use std::str::{FromStr, Utf8Error};
/// A small inline string
#[derive(Clone, Copy, Default)]
#[repr(transparent)]
pub struct SmallString {
inner: [u8; 16],
}
impl SmallString {
#[inline]
pub const fn new() -> Self {
Self { inner: [0; 16] }
}
#[inline]
pub fn from_utf8(bytes: &[u8]) -> Result<SmallString, BadSmallStringError> {
Self::from_str(str::from_utf8(bytes).map_err(BadSmallStringError::BadUtf8)?)
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Result<SmallString, BadSmallStringError> {
// We check that it is valid UTF-8
str::from_utf8(&bytes.as_ref()[..bytes[15].into()])
.map_err(BadSmallStringError::BadUtf8)?;
Ok(Self { inner: bytes })
}
#[inline]
pub fn len(&self) -> usize {
self.inner[15].into()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[allow(unsafe_code)]
pub fn as_str(&self) -> &str {
unsafe {
// safe because we ensured it in constructors
str::from_utf8_unchecked(self.as_bytes())
}
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.inner[..self.len()]
}
#[inline]
pub fn to_be_bytes(self) -> [u8; 16] {
self.inner
}
}
impl Deref for SmallString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl AsRef<str> for SmallString {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl Borrow<str> for SmallString {
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl fmt::Debug for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl fmt::Display for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl PartialEq for SmallString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str().eq(other.deref())
}
}
impl Eq for SmallString {}
impl PartialOrd for SmallString {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl Ord for SmallString {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Hash for SmallString {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl From<SmallString> for String {
#[inline]
fn from(value: SmallString) -> Self {
value.as_str().into()
}
}
impl<'a> From<&'a SmallString> for &'a str {
#[inline]
fn from(value: &'a SmallString) -> Self {
value.as_str()
}
}
impl FromStr for SmallString {
type Err = BadSmallStringError;
#[inline]
fn from_str(value: &str) -> Result<Self, BadSmallStringError> {
if value.len() <= 15 {
let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value
.len()
.try_into()
.map_err(|_| BadSmallStringError::TooLong(value.len()))?;
Ok(Self { inner })
} else {
Err(BadSmallStringError::TooLong(value.len()))
}
}
}
impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError;
#[inline]
fn try_from(value: &'a str) -> Result<Self, BadSmallStringError> {
Self::from_str(value)
}
}
#[derive(Debug, Clone, Copy)]
pub enum BadSmallStringError {
TooLong(usize),
BadUtf8(Utf8Error),
}
impl fmt::Display for BadSmallStringError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooLong(v) => write!(
f,
"small strings could only contain at most 15 characters, found {}",
v
),
Self::BadUtf8(e) => e.fmt(f),
}
}
}
impl Error for BadSmallStringError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::TooLong(_) => None,
Self::BadUtf8(e) => Some(e),
}
}
}
Loading…
Cancel
Save