From 58106909ce44c66f70cdf29ea50bfe504d22647a Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 8 Oct 2018 17:41:28 +0200 Subject: [PATCH] Makes encoding of some very usual IRIs and of empty string well known TODO: make the set of encoded URIs extensible --- lib/src/sparql/eval.rs | 16 ++++----- lib/src/store/memory.rs | 13 ++++++- lib/src/store/numeric_encoder.rs | 60 ++++++++++++++++++++++++++++++++ lib/src/store/rocksdb.rs | 6 ++-- 4 files changed, 84 insertions(+), 11 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 7b7bded8..e243912e 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use std::iter::once; use std::iter::Iterator; use std::sync::Arc; -use store::numeric_encoder::EncodedTerm; +use store::numeric_encoder::*; use store::store::EncodedQuadsStore; use Result; @@ -240,13 +240,13 @@ impl SimpleEvaluator { _ => None, }, PlanExpression::Datatype(e) => match self.eval_expression(e, tuple)? { - //TODO EncodedTerm::SimpleLiteral { .. } - //TODO EncodedTerm::LangStringLiteral { .. } + EncodedTerm::SimpleLiteral { .. } => Some(ENCODED_XSD_STRING_NAMED_NODE), + EncodedTerm::LangStringLiteral { .. } => Some(ENCODED_RDF_LANG_STRING_NAMED_NODE), EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { iri_id: datatype_id, }), - //TODO EncodedTerm::StringLiteral { .. } - //TODO EncodedTerm::BooleanLiteral(..) + EncodedTerm::StringLiteral { .. } => Some(ENCODED_XSD_STRING_NAMED_NODE), + EncodedTerm::BooleanLiteral(..) => Some(ENCODED_XSD_BOOLEAN_NAMED_NODE), _ => None, }, PlanExpression::Bound(v) => Some((*v >= tuple.len() && tuple[*v].is_some()).into()), @@ -279,9 +279,9 @@ impl SimpleEvaluator { fn to_bool(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::BooleanLiteral(value) => Some(value), - EncodedTerm::NamedNode { .. } => None, - EncodedTerm::BlankNode(_) => None, - term => self.store.encoder().decode_term(term).ok()?.to_bool(), + EncodedTerm::SimpleLiteral { .. } => Some(term != ENCODED_EMPTY_SIMPLE_LITERAL), + EncodedTerm::StringLiteral { .. } => Some(term != ENCODED_EMPTY_STRING_LITERAL), + _ => None, } } diff --git a/lib/src/store/memory.rs b/lib/src/store/memory.rs index d0daed62..d3a21eb9 100644 --- a/lib/src/store/memory.rs +++ b/lib/src/store/memory.rs @@ -42,13 +42,24 @@ pub type MemoryDataset = StoreDataset; /// ``` pub type MemoryGraph = StoreDefaultGraph; -#[derive(Default)] pub struct MemoryStore { id2str: RwLock>>, str2id: RwLock, u64>>, graph_indexes: RwLock>, } +impl Default for MemoryStore { + fn default() -> Self { + let new = Self { + id2str: RwLock::default(), + str2id: RwLock::default(), + graph_indexes: RwLock::default(), + }; + new.set_first_strings().unwrap(); + new + } +} + #[derive(Default)] struct MemoryGraphIndexes { spo: BTreeMap>>, diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 617eb441..3d00a6db 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -1,4 +1,6 @@ use byteorder::{NetworkEndian, ReadBytesExt, WriteBytesExt}; +use model::vocab::rdf; +use model::vocab::xsd; use model::*; use std::io::Read; use std::io::Write; @@ -10,11 +12,39 @@ use uuid::Uuid; use Error; use Result; +const EMPTY_STRING_ID: u64 = 0; +const RDF_LANG_STRING_ID: u64 = 1; +const XSD_STRING_ID: u64 = 2; +const XSD_BOOLEAN_ID: u64 = 3; +const XSD_FLOAT_ID: u64 = 4; +const XSD_DOUBLE_ID: u64 = 5; +const XSD_INTEGER_ID: u64 = 6; +const XSD_DECIMAL_ID: u64 = 7; +const XSD_DATE_TIME_ID: u64 = 8; + pub trait BytesStore { type BytesOutput: Deref; fn insert_bytes(&self, value: &[u8]) -> Result; fn get_bytes(&self, id: u64) -> Result>; + + /// Should be called when the bytes store is created + fn set_first_strings(&self) -> Result<()> { + if EMPTY_STRING_ID == self.insert_bytes(b"")? + && RDF_LANG_STRING_ID == self.insert_bytes(rdf::LANG_STRING.as_str().as_bytes())? + && XSD_STRING_ID == self.insert_bytes(xsd::STRING.as_str().as_bytes())? + && XSD_BOOLEAN_ID == self.insert_bytes(xsd::BOOLEAN.as_str().as_bytes())? + && XSD_FLOAT_ID == self.insert_bytes(xsd::FLOAT.as_str().as_bytes())? + && XSD_DOUBLE_ID == self.insert_bytes(xsd::DOUBLE.as_str().as_bytes())? + && XSD_INTEGER_ID == self.insert_bytes(xsd::INTEGER.as_str().as_bytes())? + && XSD_DECIMAL_ID == self.insert_bytes(xsd::DECIMAL.as_str().as_bytes())? + && XSD_DATE_TIME_ID == self.insert_bytes(xsd::DATE_TIME.as_str().as_bytes())? + { + Ok(()) + } else { + Err("Failed to properly setup the basic string ids in the dictionnary".into()) + } + } } const TYPE_DEFAULT_GRAPH_ID: u8 = 0; @@ -28,6 +58,36 @@ const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7; const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8; pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {}; +pub static ENCODED_EMPTY_SIMPLE_LITERAL: EncodedTerm = EncodedTerm::SimpleLiteral { + value_id: EMPTY_STRING_ID, +}; +pub static ENCODED_EMPTY_STRING_LITERAL: EncodedTerm = EncodedTerm::StringLiteral { + value_id: EMPTY_STRING_ID, +}; +pub static ENCODED_RDF_LANG_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: RDF_LANG_STRING_ID, +}; +pub static ENCODED_XSD_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_STRING_ID, +}; +pub static ENCODED_XSD_BOOLEAN_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_BOOLEAN_ID, +}; +pub static ENCODED_XSD_FLOAT_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_FLOAT_ID, +}; +pub static ENCODED_XSD_DOUBLE_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_DOUBLE_ID, +}; +pub static ENCODED_XSD_INTEGER_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_INTEGER_ID, +}; +pub static ENCODED_XSD_DECIMAL_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_DECIMAL_ID, +}; +pub static ENCODED_XSD_DATE_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode { + iri_id: XSD_DATE_TIME_ID, +}; #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] pub enum EncodedTerm { diff --git a/lib/src/store/rocksdb.rs b/lib/src/store/rocksdb.rs index 4086eaee..ca505ad9 100644 --- a/lib/src/store/rocksdb.rs +++ b/lib/src/store/rocksdb.rs @@ -65,7 +65,7 @@ impl RocksDbStore { let posg_cf = get_cf(&db, POSG_CF)?; let ospg_cf = get_cf(&db, OSPG_CF)?; - Ok(Self { + let new = Self { db, str_id_counter: Mutex::new(RocksDBCounter::new("bsc")), id2str_cf, @@ -73,7 +73,9 @@ impl RocksDbStore { spog_cf, posg_cf, ospg_cf, - }) + }; + new.set_first_strings()?; + Ok(new) } }