use crate::model::vocab::rdf; use crate::model::vocab::xsd; use crate::model::xsd::*; use crate::model::*; use crate::Error; use crate::Result; use md5::{Digest, Md5}; use rand::random; use rio_api::model as rio; use std::collections::HashMap; use std::hash::Hash; use std::hash::Hasher; use std::io::Read; use std::io::Write; use std::mem::size_of; use std::str; #[derive(Ord, PartialOrd, Eq, PartialEq, Debug, Copy, Clone, Hash)] #[repr(transparent)] pub struct StrHash { hash: u128, } impl StrHash { pub fn new(value: &str) -> Self { Self { hash: u128::from_le_bytes(Md5::new().chain(value).result().into()), } } const fn constant(hash: u128) -> Self { Self { hash } } #[inline] pub fn from_be_bytes(bytes: [u8; 16]) -> Self { Self { hash: u128::from_be_bytes(bytes), } } #[inline] pub fn to_be_bytes(&self) -> [u8; 16] { self.hash.to_be_bytes() } #[inline] pub fn from_le_bytes(bytes: [u8; 16]) -> Self { Self { hash: u128::from_le_bytes(bytes), } } #[inline] pub fn to_le_bytes(&self) -> [u8; 16] { // TODO: remove when changing hash self.hash.to_le_bytes() } } const EMPTY_STRING_ID: StrHash = StrHash::constant(0x7e42_f8ec_9809_80e9_04b2_008f_d98c_1dd4); const RDF_LANG_STRING_ID: StrHash = StrHash::constant(0x18d0_2a52_9d31_6816_3312_0bf8_c4c1_93a2); const XSD_STRING_ID: StrHash = StrHash::constant(0x0a61_f70e_4e33_60d3_9bef_c9b2_d18f_594e); const XSD_BOOLEAN_ID: StrHash = StrHash::constant(0x47f7_8f91_0b4b_158f_11dc_ff5f_9b78_be13); const XSD_FLOAT_ID: StrHash = StrHash::constant(0x17b8_33c5_f0ac_43f4_fafe_fc02_0b2d_adc7); const XSD_DOUBLE_ID: StrHash = StrHash::constant(0x2981_2bd9_5143_2783_9885_73e5_138a_8c01); const XSD_INTEGER_ID: StrHash = StrHash::constant(0xc6fb_689d_64f7_dd7b_dad0_36f9_d4f4_ee2a); const XSD_DECIMAL_ID: StrHash = StrHash::constant(0x3ca7_b56d_a746_719a_6800_081f_bb59_ea33); const XSD_DATE_TIME_ID: StrHash = StrHash::constant(0xc206_6749_e0e5_015e_f7ee_33b7_b28c_c010); const XSD_DATE_ID: StrHash = StrHash::constant(0xcaae_3cc4_f23f_4c5a_7717_dd19_e30a_84b8); const XSD_TIME_ID: StrHash = StrHash::constant(0x7af4_6a16_1b02_35d7_9a79_07ba_3da9_48bb); const XSD_DURATION_ID: StrHash = StrHash::constant(0x78ab_8431_984b_6b06_c42d_6271_b82e_487d); const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_BLANK_NODE_ID: u8 = 2; const TYPE_LANG_STRING_LITERAL_ID: u8 = 4; const TYPE_TYPED_LITERAL_ID: u8 = 5; const TYPE_STRING_LITERAL: u8 = 6; const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7; const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8; const TYPE_FLOAT_LITERAL: u8 = 9; const TYPE_DOUBLE_LITERAL: u8 = 10; const TYPE_INTEGER_LITERAL: u8 = 11; const TYPE_DECIMAL_LITERAL: u8 = 12; const TYPE_DATE_TIME_LITERAL: u8 = 13; const TYPE_DATE_LITERAL: u8 = 14; const TYPE_TIME_LITERAL: u8 = 15; const TYPE_DURATION_LITERAL: u8 = 16; pub const ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph; pub const ENCODED_EMPTY_STRING_LITERAL: EncodedTerm = EncodedTerm::StringLiteral { value_id: EMPTY_STRING_ID, }; pub const ENCODED_RDF_LANG_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: RDF_LANG_STRING_ID, }; pub const ENCODED_XSD_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_STRING_ID, }; pub const ENCODED_XSD_BOOLEAN_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_BOOLEAN_ID, }; pub const ENCODED_XSD_FLOAT_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_FLOAT_ID, }; pub const ENCODED_XSD_DOUBLE_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_DOUBLE_ID, }; pub const ENCODED_XSD_INTEGER_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_INTEGER_ID, }; pub const ENCODED_XSD_DECIMAL_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_DECIMAL_ID, }; pub const ENCODED_XSD_DATE_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_DATE_ID, }; pub const ENCODED_XSD_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_TIME_ID, }; pub const ENCODED_XSD_DATE_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_DATE_TIME_ID, }; pub const ENCODED_XSD_DURATION_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { iri_id: XSD_DURATION_ID, }; #[derive(Debug, Clone, Copy)] pub enum EncodedTerm { DefaultGraph, NamedNode { iri_id: StrHash, }, BlankNode { id: u128, }, StringLiteral { value_id: StrHash, }, LangStringLiteral { value_id: StrHash, language_id: StrHash, }, TypedLiteral { value_id: StrHash, datatype_id: StrHash, }, BooleanLiteral(bool), FloatLiteral(f32), DoubleLiteral(f64), IntegerLiteral(i64), DecimalLiteral(Decimal), DateLiteral(Date), TimeLiteral(Time), DateTimeLiteral(DateTime), DurationLiteral(Duration), } impl PartialEq for EncodedTerm { fn eq(&self, other: &Self) -> bool { match (self, other) { (EncodedTerm::DefaultGraph, EncodedTerm::DefaultGraph) => true, ( EncodedTerm::NamedNode { iri_id: iri_id_a }, EncodedTerm::NamedNode { iri_id: iri_id_b }, ) => iri_id_a == iri_id_b, (EncodedTerm::BlankNode { id: id_a }, EncodedTerm::BlankNode { id: id_b }) => { id_a == id_b } ( EncodedTerm::StringLiteral { value_id: value_id_a, }, EncodedTerm::StringLiteral { value_id: value_id_b, }, ) => value_id_a == value_id_b, ( EncodedTerm::LangStringLiteral { value_id: value_id_a, language_id: language_id_a, }, EncodedTerm::LangStringLiteral { value_id: value_id_b, language_id: language_id_b, }, ) => value_id_a == value_id_b && language_id_a == language_id_b, ( EncodedTerm::TypedLiteral { value_id: value_id_a, datatype_id: datatype_id_a, }, EncodedTerm::TypedLiteral { value_id: value_id_b, datatype_id: datatype_id_b, }, ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b, (EncodedTerm::BooleanLiteral(a), EncodedTerm::BooleanLiteral(b)) => a == b, (EncodedTerm::FloatLiteral(a), EncodedTerm::FloatLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } (EncodedTerm::DoubleLiteral(a), EncodedTerm::DoubleLiteral(b)) => { if a.is_nan() { b.is_nan() } else { a == b } } (EncodedTerm::IntegerLiteral(a), EncodedTerm::IntegerLiteral(b)) => a == b, (EncodedTerm::DecimalLiteral(a), EncodedTerm::DecimalLiteral(b)) => a == b, (EncodedTerm::DateLiteral(a), EncodedTerm::DateLiteral(b)) => a == b, (EncodedTerm::TimeLiteral(a), EncodedTerm::TimeLiteral(b)) => a == b, (EncodedTerm::DateTimeLiteral(a), EncodedTerm::DateTimeLiteral(b)) => a == b, (EncodedTerm::DurationLiteral(a), EncodedTerm::DurationLiteral(b)) => a == b, (_, _) => false, } } } impl Eq for EncodedTerm {} impl Hash for EncodedTerm { fn hash(&self, state: &mut H) { match self { EncodedTerm::NamedNode { iri_id } => iri_id.hash(state), EncodedTerm::BlankNode { id } => id.hash(state), EncodedTerm::DefaultGraph => (), EncodedTerm::StringLiteral { value_id } => value_id.hash(state), EncodedTerm::LangStringLiteral { value_id, language_id, } => { value_id.hash(state); language_id.hash(state); } EncodedTerm::TypedLiteral { value_id, datatype_id, } => { value_id.hash(state); datatype_id.hash(state); } EncodedTerm::BooleanLiteral(value) => value.hash(state), EncodedTerm::FloatLiteral(value) => state.write(&value.to_ne_bytes()), EncodedTerm::DoubleLiteral(value) => state.write(&value.to_ne_bytes()), EncodedTerm::IntegerLiteral(value) => value.hash(state), EncodedTerm::DecimalLiteral(value) => value.hash(state), EncodedTerm::DateLiteral(value) => value.hash(state), EncodedTerm::TimeLiteral(value) => value.hash(state), EncodedTerm::DateTimeLiteral(value) => value.hash(state), EncodedTerm::DurationLiteral(value) => value.hash(state), } } } impl EncodedTerm { pub fn is_named_node(&self) -> bool { match self { EncodedTerm::NamedNode { .. } => true, _ => false, } } pub fn is_blank_node(&self) -> bool { match self { EncodedTerm::BlankNode { .. } => true, _ => false, } } pub fn is_literal(&self) -> bool { match self { EncodedTerm::StringLiteral { .. } | EncodedTerm::LangStringLiteral { .. } | EncodedTerm::TypedLiteral { .. } | EncodedTerm::BooleanLiteral(_) | EncodedTerm::FloatLiteral(_) | EncodedTerm::DoubleLiteral(_) | EncodedTerm::IntegerLiteral(_) | EncodedTerm::DecimalLiteral(_) | EncodedTerm::DateLiteral(_) | EncodedTerm::TimeLiteral(_) | EncodedTerm::DateTimeLiteral(_) | EncodedTerm::DurationLiteral(_) => true, _ => false, } } pub fn datatype(&self) -> Option { match self { EncodedTerm::StringLiteral { .. } => Some(ENCODED_XSD_STRING_NAMED_NODE), EncodedTerm::LangStringLiteral { .. } => Some(ENCODED_RDF_LANG_STRING_NAMED_NODE), EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { iri_id: *datatype_id, }), EncodedTerm::BooleanLiteral(..) => Some(ENCODED_XSD_BOOLEAN_NAMED_NODE), EncodedTerm::FloatLiteral(..) => Some(ENCODED_XSD_FLOAT_NAMED_NODE), EncodedTerm::DoubleLiteral(..) => Some(ENCODED_XSD_DOUBLE_NAMED_NODE), EncodedTerm::IntegerLiteral(..) => Some(ENCODED_XSD_INTEGER_NAMED_NODE), EncodedTerm::DecimalLiteral(..) => Some(ENCODED_XSD_DECIMAL_NAMED_NODE), EncodedTerm::DateLiteral(..) => Some(ENCODED_XSD_DATE_NAMED_NODE), EncodedTerm::TimeLiteral(..) => Some(ENCODED_XSD_TIME_NAMED_NODE), EncodedTerm::DateTimeLiteral(..) => Some(ENCODED_XSD_DATE_TIME_NAMED_NODE), EncodedTerm::DurationLiteral(..) => Some(ENCODED_XSD_DURATION_NAMED_NODE), _ => None, } } fn type_id(&self) -> u8 { match self { EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, EncodedTerm::BlankNode { .. } => TYPE_BLANK_NODE_ID, EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, EncodedTerm::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE, EncodedTerm::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE, EncodedTerm::FloatLiteral(_) => TYPE_FLOAT_LITERAL, EncodedTerm::DoubleLiteral(_) => TYPE_DOUBLE_LITERAL, EncodedTerm::IntegerLiteral(_) => TYPE_INTEGER_LITERAL, EncodedTerm::DecimalLiteral(_) => TYPE_DECIMAL_LITERAL, EncodedTerm::DateLiteral(_) => TYPE_DATE_LITERAL, EncodedTerm::TimeLiteral(_) => TYPE_TIME_LITERAL, EncodedTerm::DateTimeLiteral(_) => TYPE_DATE_TIME_LITERAL, EncodedTerm::DurationLiteral(_) => TYPE_DURATION_LITERAL, } } } impl From for EncodedTerm { fn from(value: bool) -> Self { EncodedTerm::BooleanLiteral(value) } } impl From for EncodedTerm { fn from(value: i64) -> Self { EncodedTerm::IntegerLiteral(value) } } impl From for EncodedTerm { fn from(value: i32) -> Self { EncodedTerm::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: u32) -> Self { EncodedTerm::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: u8) -> Self { EncodedTerm::IntegerLiteral(value.into()) } } impl From for EncodedTerm { fn from(value: f32) -> Self { EncodedTerm::FloatLiteral(value) } } impl From for EncodedTerm { fn from(value: f64) -> Self { EncodedTerm::DoubleLiteral(value) } } impl From for EncodedTerm { fn from(value: Decimal) -> Self { EncodedTerm::DecimalLiteral(value) } } impl From for EncodedTerm { fn from(value: Date) -> Self { EncodedTerm::DateLiteral(value) } } impl From