#![allow(clippy::unreadable_literal)] use crate::error::invalid_data_error; use crate::model::xsd::*; use crate::model::*; use crate::sparql::EvaluationError; use crate::storage::small_string::SmallString; use rand::random; use rio_api::model as rio; use siphasher::sip128::{Hasher128, SipHasher24}; use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::error::Error; use std::fmt::Debug; use std::hash::Hash; use std::hash::Hasher; use std::rc::Rc; use std::{fmt, io, str}; #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[repr(transparent)] pub struct StrHash { hash: u128, } impl StrHash { pub fn new(value: &str) -> Self { let mut hasher = SipHasher24::new(); hasher.write(value.as_bytes()); Self { hash: hasher.finish128().into(), } } #[inline] pub fn from_be_bytes(bytes: [u8; 16]) -> Self { Self { hash: u128::from_be_bytes(bytes), } } #[inline] pub fn to_be_bytes(self) -> [u8; 16] { self.hash.to_be_bytes() } } #[derive(Debug, Clone)] pub enum EncodedTerm { DefaultGraph, NamedNode { iri_id: StrHash, }, NumericalBlankNode { id: u128, }, SmallBlankNode(SmallString), BigBlankNode { id_id: StrHash, }, SmallStringLiteral(SmallString), BigStringLiteral { value_id: StrHash, }, SmallSmallLangStringLiteral { value: SmallString, language: SmallString, }, SmallBigLangStringLiteral { value: SmallString, language_id: StrHash, }, BigSmallLangStringLiteral { value_id: StrHash, language: SmallString, }, BigBigLangStringLiteral { value_id: StrHash, language_id: StrHash, }, SmallTypedLiteral { value: SmallString, datatype_id: StrHash, }, BigTypedLiteral { value_id: StrHash, datatype_id: StrHash, }, BooleanLiteral(bool), FloatLiteral(f32), DoubleLiteral(f64), IntegerLiteral(i64), DecimalLiteral(Decimal), DateTimeLiteral(DateTime), TimeLiteral(Time), DateLiteral(Date), GYearMonthLiteral(GYearMonth), GYearLiteral(GYear), GMonthDayLiteral(GMonthDay), GDayLiteral(GDay), GMonthLiteral(GMonth), DurationLiteral(Duration), YearMonthDurationLiteral(YearMonthDuration), DayTimeDurationLiteral(DayTimeDuration), Triple(Rc), } impl PartialEq for EncodedTerm { fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::DefaultGraph, Self::DefaultGraph) => true, (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => { iri_id_a == iri_id_b } (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => { id_a == id_b } (Self::SmallBlankNode(id_a), Self::SmallBlankNode(id_b)) => id_a == id_b, (Self::BigBlankNode { id_id: id_a }, Self::BigBlankNode { id_id: id_b }) => { id_a == id_b } (Self::SmallStringLiteral(a), Self::SmallStringLiteral(b)) => a == b, ( Self::BigStringLiteral { value_id: value_id_a, }, Self::BigStringLiteral { value_id: value_id_b, }, ) => value_id_a == value_id_b, ( Self::SmallSmallLangStringLiteral { value: value_a, language: language_a, }, Self::SmallSmallLangStringLiteral { value: value_b, language: language_b, }, ) => value_a == value_b && language_a == language_b, ( Self::SmallBigLangStringLiteral { value: value_a, language_id: language_id_a, }, Self::SmallBigLangStringLiteral { value: value_b, language_id: language_id_b, }, ) => value_a == value_b && language_id_a == language_id_b, ( Self::BigSmallLangStringLiteral { value_id: value_id_a, language: language_a, }, Self::BigSmallLangStringLiteral { value_id: value_id_b, language: language_b, }, ) => value_id_a == value_id_b && language_a == language_b, ( Self::BigBigLangStringLiteral { value_id: value_id_a, language_id: language_id_a, }, Self::BigBigLangStringLiteral { value_id: value_id_b, language_id: language_id_b, }, ) => value_id_a == value_id_b && language_id_a == language_id_b, ( Self::SmallTypedLiteral { value: value_a, datatype_id: datatype_id_a, }, Self::SmallTypedLiteral { value: value_b, datatype_id: datatype_id_b, }, ) => value_a == value_b && datatype_id_a == datatype_id_b, ( Self::BigTypedLiteral { value_id: value_id_a, datatype_id: datatype_id_a, }, Self::BigTypedLiteral { value_id: value_id_b, datatype_id: datatype_id_b, }, ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b, (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b, (Self::FloatLiteral(a), Self::FloatLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a == b, (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a == b, (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(b), (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(b), (Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(b), (Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(b), (Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(b), (Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(b), (Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(b), (Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(b), (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a == b, (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => a == b, (Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => a == b, (Self::Triple(a), Self::Triple(b)) => a == b, (_, _) => false, } } } impl Eq for EncodedTerm {} impl Hash for EncodedTerm { fn hash(&self, state: &mut H) { match self { Self::NamedNode { iri_id } => iri_id.hash(state), Self::NumericalBlankNode { id } => id.hash(state), Self::SmallBlankNode(id) => id.hash(state), Self::BigBlankNode { id_id } => id_id.hash(state), Self::DefaultGraph => (), Self::SmallStringLiteral(value) => value.hash(state), Self::BigStringLiteral { value_id } => value_id.hash(state), Self::SmallSmallLangStringLiteral { value, language } => { value.hash(state); language.hash(state); } Self::SmallBigLangStringLiteral { value, language_id } => { value.hash(state); language_id.hash(state); } Self::BigSmallLangStringLiteral { value_id, language } => { value_id.hash(state); language.hash(state); } Self::BigBigLangStringLiteral { value_id, language_id, } => { value_id.hash(state); language_id.hash(state); } Self::SmallTypedLiteral { value, datatype_id } => { value.hash(state); datatype_id.hash(state); } Self::BigTypedLiteral { value_id, datatype_id, } => { value_id.hash(state); datatype_id.hash(state); } Self::BooleanLiteral(value) => value.hash(state), Self::FloatLiteral(value) => state.write(&value.to_ne_bytes()), Self::DoubleLiteral(value) => state.write(&value.to_ne_bytes()), Self::IntegerLiteral(value) => value.hash(state), Self::DecimalLiteral(value) => value.hash(state), Self::DateTimeLiteral(value) => value.hash(state), Self::TimeLiteral(value) => value.hash(state), Self::DateLiteral(value) => value.hash(state), Self::GYearMonthLiteral(value) => value.hash(state), Self::GYearLiteral(value) => value.hash(state), Self::GMonthDayLiteral(value) => value.hash(state), Self::GDayLiteral(value) => value.hash(state), Self::GMonthLiteral(value) => value.hash(state), Self::DurationLiteral(value) => value.hash(state), Self::YearMonthDurationLiteral(value) => value.hash(state), Self::DayTimeDurationLiteral(value) => value.hash(state), Self::Triple(value) => value.hash(state), } } } impl EncodedTerm { pub fn is_named_node(&self) -> bool { matches!(self, Self::NamedNode { .. }) } pub fn is_blank_node(&self) -> bool { matches!( self, Self::NumericalBlankNode { .. } | Self::SmallBlankNode { .. } | Self::BigBlankNode { .. } ) } pub fn is_literal(&self) -> bool { matches!( self, Self::SmallStringLiteral { .. } | Self::BigStringLiteral { .. } | Self::SmallSmallLangStringLiteral { .. } | Self::SmallBigLangStringLiteral { .. } | Self::BigSmallLangStringLiteral { .. } | Self::BigBigLangStringLiteral { .. } | Self::SmallTypedLiteral { .. } | Self::BigTypedLiteral { .. } | Self::BooleanLiteral(_) | Self::FloatLiteral(_) | Self::DoubleLiteral(_) | Self::IntegerLiteral(_) | Self::DecimalLiteral(_) | Self::DateTimeLiteral(_) | Self::TimeLiteral(_) | Self::DateLiteral(_) | Self::GYearMonthLiteral(_) | Self::GYearLiteral(_) | Self::GMonthDayLiteral(_) | Self::GDayLiteral(_) | Self::GMonthLiteral(_) | Self::DurationLiteral(_) | Self::YearMonthDurationLiteral(_) | Self::DayTimeDurationLiteral(_) ) } pub fn is_unknown_typed_literal(&self) -> bool { matches!( self, Self::SmallTypedLiteral { .. } | Self::BigTypedLiteral { .. } ) } pub fn is_default_graph(&self) -> bool { matches!(self, Self::DefaultGraph) } pub fn is_triple(&self) -> bool { matches!(self, Self::Triple { .. }) } pub fn on_each_id( &self, callback: &mut impl FnMut(&StrHash) -> Result<(), E>, ) -> Result<(), E> { match self { Self::NamedNode { iri_id } => { callback(iri_id)?; } Self::BigBlankNode { id_id } => { callback(id_id)?; } Self::BigStringLiteral { value_id } => { callback(value_id)?; } Self::SmallBigLangStringLiteral { language_id, .. } => { callback(language_id)?; } Self::BigSmallLangStringLiteral { value_id, .. } => { callback(value_id)?; } Self::BigBigLangStringLiteral { value_id, language_id, } => { callback(value_id)?; callback(language_id)?; } Self::SmallTypedLiteral { datatype_id, .. } => { callback(datatype_id)?; } Self::BigTypedLiteral { value_id, datatype_id, } => { callback(value_id)?; callback(datatype_id)?; } Self::Triple(triple) => { triple.subject.on_each_id(callback)?; triple.predicate.on_each_id(callback)?; triple.object.on_each_id(callback)?; } _ => (), } Ok(()) } } impl From for EncodedTerm { fn from(value: bool) -> Self { Self::BooleanLiteral(value) } } impl From for EncodedTerm { fn from(value: i64) -> Self { Self::IntegerLiteral(value) } } impl From for EncodedTerm { fn from(value: i32) -> Self { Self::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: u32) -> Self { Self::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: u8) -> Self { Self::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: f32) -> Self { Self::FloatLiteral(value) } } impl From for EncodedTerm { fn from(value: f64) -> Self { Self::DoubleLiteral(value) } } impl From for EncodedTerm { fn from(value: Decimal) -> Self { Self::DecimalLiteral(value) } } impl From for EncodedTerm { fn from(value: DateTime) -> Self { Self::DateTimeLiteral(value) } } impl From