From 6725fc6dc54d386266c4d05edba7039347b51f98 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 6 Sep 2018 16:26:20 +0200 Subject: [PATCH] Allows different terms to have various encoding size --- src/store/numeric_encoder.rs | 423 +++++++++++++++++++---------------- src/store/rocksdb/mod.rs | 2 +- src/store/rocksdb/storage.rs | 80 +++---- src/utils.rs | 6 + 4 files changed, 276 insertions(+), 235 deletions(-) diff --git a/src/store/numeric_encoder.rs b/src/store/numeric_encoder.rs index 98fee99b..a662b9f0 100644 --- a/src/store/numeric_encoder.rs +++ b/src/store/numeric_encoder.rs @@ -1,45 +1,110 @@ use errors::*; use model::*; +use std::mem::size_of; use std::ops::Deref; use std::str; use std::str::FromStr; use url::Url; +use utils::from_bytes_slice; +use utils::to_bytes; use uuid::Uuid; -pub const STRING_KEY_SIZE: usize = 8; - pub trait BytesStore { type BytesOutput: Deref; - fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()>; - fn get(&self, id: &[u8]) -> Result>; + fn put(&self, value: &[u8]) -> Result; + fn get(&self, id: usize) -> Result>; } -const TYPE_KEY_SIZE: usize = 1; const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_BLANK_NODE_ID: u8 = 2; const TYPE_LANG_STRING_LITERAL_ID: u8 = 3; const TYPE_TYPED_LITERAL_ID: u8 = 4; -pub const TERM_ENCODING_SIZE: usize = TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE; -const EMPTY_TERM: [u8; TERM_ENCODING_SIZE] = [0 as u8; TERM_ENCODING_SIZE]; #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] -pub struct EncodedTerm([u8; TERM_ENCODING_SIZE]); +pub enum EncodedTerm { + NamedNode { iri_id: usize }, + BlankNode(Uuid), + LangStringLiteral { value_id: usize, language_id: usize }, + TypedLiteral { value_id: usize, datatype_id: usize }, +} impl EncodedTerm { pub fn new_from_buffer(buffer: &[u8]) -> Result { - if buffer.len() != TERM_ENCODING_SIZE { - return Err("the term buffer has not the correct length".into()); + if buffer.is_empty() { + return Err("the term buffer is empty.".into()); + } + if buffer.len() < Self::type_length(buffer[0])? { + return Err(format!( + "the term buffer with id {} do not have at least {} bytes.", + buffer[0], + buffer.len() + ).into()); + } + match buffer[0] { + TYPE_NAMED_NODE_ID => Ok(EncodedTerm::NamedNode { + iri_id: from_bytes_slice(&buffer[1..1 + size_of::()]), + }), + TYPE_BLANK_NODE_ID => Ok(EncodedTerm::BlankNode(Uuid::from_bytes(&buffer[1..17])?)), + TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral { + language_id: from_bytes_slice(&buffer[1..1 + size_of::()]), + value_id: from_bytes_slice( + &buffer[1 + size_of::()..1 + 2 * size_of::()], + ), + }), + TYPE_TYPED_LITERAL_ID => Ok(EncodedTerm::TypedLiteral { + datatype_id: from_bytes_slice(&buffer[1..1 + size_of::()]), + value_id: from_bytes_slice( + &buffer[1 + size_of::()..1 + 2 * size_of::()], + ), + }), + _ => Err("the term buffer has an invalid type id".into()), + } + } + + pub fn encoding_size(&self) -> usize { + Self::type_length(self.type_id()).unwrap() //It is not possible to fail here + } + + fn type_id(&self) -> u8 { + match self { + EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, + EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID, + EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, + EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, + } + } + + fn type_length(type_id: u8) -> Result { + match type_id { + TYPE_NAMED_NODE_ID => Ok(1 + size_of::()), + TYPE_BLANK_NODE_ID => Ok(17), //TODO: guess + TYPE_LANG_STRING_LITERAL_ID => Ok(1 + 2 * size_of::()), + TYPE_TYPED_LITERAL_ID => Ok(1 + 2 * size_of::()), + _ => Err(format!("{} is not a known type id", type_id).into()), } - let mut buf = [0 as u8; TERM_ENCODING_SIZE]; - buf.copy_from_slice(buffer); - return Ok(EncodedTerm(buf)); } -} -impl AsRef<[u8]> for EncodedTerm { - fn as_ref(&self) -> &[u8] { - &self.0[..] + pub fn add_to_vec(&self, vec: &mut Vec) { + vec.push(self.type_id()); + match self { + EncodedTerm::NamedNode { iri_id } => vec.extend_from_slice(&to_bytes(*iri_id)), + EncodedTerm::BlankNode(id) => vec.extend_from_slice(id.as_bytes()), + EncodedTerm::LangStringLiteral { + value_id, + language_id, + } => { + vec.extend_from_slice(&to_bytes(*language_id)); + vec.extend_from_slice(&to_bytes(*value_id)); + } + EncodedTerm::TypedLiteral { + value_id, + datatype_id, + } => { + vec.extend_from_slice(&to_bytes(*datatype_id)); + vec.extend_from_slice(&to_bytes(*value_id)); + } + } } } @@ -48,94 +113,113 @@ pub struct EncodedQuad { pub subject: EncodedTerm, pub predicate: EncodedTerm, pub object: EncodedTerm, - pub graph_name: EncodedTerm, + pub graph_name: Option, } impl EncodedQuad { pub fn new_from_spog_buffer(buffer: &[u8]) -> Result { - if buffer.len() != 4 * TERM_ENCODING_SIZE { - return Err("the spog buffer has not the correct length".into()); - } + let mut start = 0 as usize; + let subject = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += subject.encoding_size(); + let predicate = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += predicate.encoding_size(); + let object = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += object.encoding_size(); + let graph_name = if start < buffer.len() { + Some(EncodedTerm::new_from_buffer(&buffer[start..])?) + } else { + None + }; Ok(Self { - subject: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, - predicate: EncodedTerm::new_from_buffer( - &buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], - )?, - object: EncodedTerm::new_from_buffer( - &buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], - )?, - graph_name: EncodedTerm::new_from_buffer( - &buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], - )?, + subject, + predicate, + object, + graph_name, }) } pub fn new_from_posg_buffer(buffer: &[u8]) -> Result { - if buffer.len() != 4 * TERM_ENCODING_SIZE { - return Err("the posg buffer has not the correct length".into()); - } + let mut start = 0 as usize; + let predicate = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += predicate.encoding_size(); + let object = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += object.encoding_size(); + let subject = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += subject.encoding_size(); + let graph_name = if start < buffer.len() { + Some(EncodedTerm::new_from_buffer(&buffer[start..])?) + } else { + None + }; Ok(Self { - subject: EncodedTerm::new_from_buffer( - &buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], - )?, - predicate: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, - object: EncodedTerm::new_from_buffer( - &buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], - )?, - graph_name: EncodedTerm::new_from_buffer( - &buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], - )?, + subject, + predicate, + object, + graph_name, }) } pub fn new_from_ospg_buffer(buffer: &[u8]) -> Result { - if buffer.len() != 4 * TERM_ENCODING_SIZE { - return Err("the ospg buffer has not the correct length".into()); - } + let mut start = 0 as usize; + let object = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += object.encoding_size(); + let subject = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += subject.encoding_size(); + let predicate = EncodedTerm::new_from_buffer(&buffer[start..])?; + start += predicate.encoding_size(); + let graph_name = if start < buffer.len() { + Some(EncodedTerm::new_from_buffer(&buffer[start..])?) + } else { + None + }; Ok(Self { - subject: EncodedTerm::new_from_buffer( - &buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], - )?, - predicate: EncodedTerm::new_from_buffer( - &buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], - )?, - object: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, - graph_name: EncodedTerm::new_from_buffer( - &buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], - )?, + subject, + predicate, + object, + graph_name, }) } - pub fn spog(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { - let mut spog = [0 as u8; 4 * TERM_ENCODING_SIZE]; - spog[0..TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); - spog[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.predicate.as_ref()); - spog[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); - spog[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] - .copy_from_slice(self.graph_name.as_ref()); + pub fn spog(&self) -> Vec { + let mut spog = Vec::with_capacity(self.encoding_size()); + self.subject.add_to_vec(&mut spog); + self.predicate.add_to_vec(&mut spog); + self.object.add_to_vec(&mut spog); + if let Some(ref graph_name) = self.graph_name { + graph_name.add_to_vec(&mut spog); + } spog } - pub fn posg(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { - let mut posg = [0 as u8; 4 * TERM_ENCODING_SIZE]; - posg[0..TERM_ENCODING_SIZE].copy_from_slice(self.predicate.as_ref()); - posg[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); - posg[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); - posg[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] - .copy_from_slice(self.graph_name.as_ref()); + pub fn posg(&self) -> Vec { + let mut posg = Vec::with_capacity(self.encoding_size()); + self.predicate.add_to_vec(&mut posg); + self.object.add_to_vec(&mut posg); + self.subject.add_to_vec(&mut posg); + if let Some(ref graph_name) = self.graph_name { + graph_name.add_to_vec(&mut posg); + } posg } - pub fn ospg(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { - let mut ospg = [0 as u8; 4 * TERM_ENCODING_SIZE]; - ospg[0..TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); - ospg[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); - ospg[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE] - .copy_from_slice(self.predicate.as_ref()); - ospg[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] - .copy_from_slice(self.graph_name.as_ref()); + pub fn ospg(&self) -> Vec { + let mut ospg = Vec::with_capacity(self.encoding_size()); + self.object.add_to_vec(&mut ospg); + self.subject.add_to_vec(&mut ospg); + self.predicate.add_to_vec(&mut ospg); + if let Some(ref graph_name) = self.graph_name { + graph_name.add_to_vec(&mut ospg); + } ospg } + + fn encoding_size(&self) -> usize { + self.subject.encoding_size() + self.predicate.encoding_size() + self.object.encoding_size() + + match self.graph_name { + Some(ref graph_name) => graph_name.encoding_size(), + None => 0, + } + } } pub struct Encoder { @@ -148,30 +232,27 @@ impl Encoder { } pub fn encode_named_node(&self, named_node: &NamedNode) -> Result { - let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; - bytes[0] = TYPE_NAMED_NODE_ID; - self.encode_str_value_to_lower_bytes(named_node.as_str(), &mut bytes)?; - Ok(EncodedTerm(bytes)) + Ok(EncodedTerm::NamedNode { + iri_id: self.encode_str_value(named_node.as_str())?, + }) } pub fn encode_blank_node(&self, blank_node: &BlankNode) -> Result { - let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; - bytes[0] = TYPE_BLANK_NODE_ID; - bytes[TYPE_KEY_SIZE..].copy_from_slice(blank_node.as_bytes()); - Ok(EncodedTerm(bytes)) + Ok(EncodedTerm::BlankNode(blank_node.deref().clone())) } pub fn encode_literal(&self, literal: &Literal) -> Result { - let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; if let Some(language) = literal.language() { - bytes[0] = TYPE_LANG_STRING_LITERAL_ID; - self.encode_str_value_to_upper_bytes(language, &mut bytes)?; + Ok(EncodedTerm::LangStringLiteral { + value_id: self.encode_str_value(&literal.value())?, + language_id: self.encode_str_value(language)?, + }) } else { - bytes[0] = TYPE_TYPED_LITERAL_ID; - self.encode_str_value_to_upper_bytes(literal.datatype().as_str(), &mut bytes)?; + Ok(EncodedTerm::TypedLiteral { + value_id: self.encode_str_value(&literal.value())?, + datatype_id: self.encode_str_value(literal.datatype().as_ref())?, + }) } - self.encode_str_value_to_lower_bytes(literal.value().as_str(), &mut bytes)?; - Ok(EncodedTerm(bytes)) } pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result { @@ -181,16 +262,6 @@ impl Encoder { } } - pub fn encode_optional_named_or_blank_node( - &self, - term: &Option, - ) -> Result { - match term { - Some(node) => self.encode_named_or_blank_node(node), - None => Ok(EncodedTerm(EMPTY_TERM)), - } - } - pub fn encode_term(&self, term: &Term) -> Result { match term { Term::NamedNode(named_node) => self.encode_named_node(named_node), @@ -204,117 +275,82 @@ impl Encoder { subject: self.encode_named_or_blank_node(quad.subject())?, predicate: self.encode_named_node(quad.predicate())?, object: self.encode_term(quad.object())?, - graph_name: self.encode_optional_named_or_blank_node(quad.graph_name())?, + graph_name: match quad.graph_name() { + Some(graph_name) => Some(self.encode_named_or_blank_node(&graph_name)?), + None => None, + }, }) } - pub fn decode_term(&self, encoded: impl AsRef<[u8]>) -> Result { - let encoding = encoded.as_ref(); - match encoding[0] { - TYPE_NAMED_NODE_ID => { - let iri = self.decode_url_value_from_lower_bytes(encoding)?; - Ok(NamedNode::from(iri).into()) + pub fn decode_term(&self, encoded: &EncodedTerm) -> Result { + match encoded { + EncodedTerm::NamedNode { iri_id } => { + Ok(NamedNode::from(self.decode_url_value(*iri_id)?).into()) } - TYPE_BLANK_NODE_ID => Ok(BlankNode::from(Uuid::from_bytes(&encoding[1..])?).into()), - TYPE_LANG_STRING_LITERAL_ID => { - let value = self.decode_str_value_from_lower_bytes(encoding)?; - let language = self.decode_str_value_from_upper_bytes(encoding)?; - Ok(Literal::new_language_tagged_literal(value, language).into()) - } - TYPE_TYPED_LITERAL_ID => { - let value = self.decode_str_value_from_lower_bytes(encoding)?; - let datatype = NamedNode::from(self.decode_url_value_from_upper_bytes(encoding)?); - Ok(Literal::new_typed_literal(value, datatype).into()) - } - _ => Err("invalid term type encoding".into()), + EncodedTerm::BlankNode(id) => Ok(BlankNode::from(*id).into()), + EncodedTerm::LangStringLiteral { + value_id, + language_id, + } => Ok(Literal::new_language_tagged_literal( + self.decode_str_value(*value_id)?, + self.decode_str_value(*language_id)?, + ).into()), + EncodedTerm::TypedLiteral { + value_id, + datatype_id, + } => Ok(Literal::new_typed_literal( + self.decode_str_value(*value_id)?, + NamedNode::from(self.decode_url_value(*datatype_id)?), + ).into()), } } - pub fn decode_named_or_blank_node( - &self, - encoded: impl AsRef<[u8]>, - ) -> Result { - let encoding = encoded.as_ref(); - match self.decode_term(encoding)? { + pub fn decode_named_or_blank_node(&self, encoded: &EncodedTerm) -> Result { + match self.decode_term(encoded)? { Term::NamedNode(named_node) => Ok(named_node.into()), Term::BlankNode(blank_node) => Ok(blank_node.into()), Term::Literal(_) => Err("A literal has ben found instead of a named node".into()), } } - pub fn decode_optional_named_or_blank_node( - &self, - encoded: impl AsRef<[u8]>, - ) -> Result> { - let encoding = encoded.as_ref(); - if encoding == EMPTY_TERM { - Ok(None) - } else { - Ok(Some(self.decode_named_or_blank_node(encoding)?)) - } - } - - pub fn decode_named_node(&self, encoded: impl AsRef<[u8]>) -> Result { - let encoding = encoded.as_ref(); - match self.decode_term(encoding)? { + pub fn decode_named_node(&self, encoded: &EncodedTerm) -> Result { + match self.decode_term(encoded)? { Term::NamedNode(named_node) => Ok(named_node), Term::BlankNode(_) => Err("A blank node has been found instead of a named node".into()), Term::Literal(_) => Err("A literal has ben found instead of a named node".into()), } } - pub fn decode_quad(&self, encoded: EncodedQuad) -> Result { + pub fn decode_quad(&self, encoded: &EncodedQuad) -> Result { Ok(Quad::new( - self.decode_named_or_blank_node(encoded.subject)?, - self.decode_named_node(encoded.predicate)?, - self.decode_term(encoded.object)?, - self.decode_optional_named_or_blank_node(encoded.graph_name)?, + self.decode_named_or_blank_node(&encoded.subject)?, + self.decode_named_node(&encoded.predicate)?, + self.decode_term(&encoded.object)?, + match encoded.graph_name { + Some(ref graph_name) => Some(self.decode_named_or_blank_node(&graph_name)?), + None => None, + }, )) } - fn encode_str_value_to_upper_bytes(&self, text: &str, bytes: &mut [u8]) -> Result<()> { - self.string_store.put( - text.as_bytes(), - &mut bytes[TYPE_KEY_SIZE..TYPE_KEY_SIZE + STRING_KEY_SIZE], - ) - } - fn encode_str_value_to_lower_bytes(&self, text: &str, bytes: &mut [u8]) -> Result<()> { - self.string_store.put( - text.as_bytes(), - &mut bytes[TYPE_KEY_SIZE + STRING_KEY_SIZE..TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE], - ) - } - - fn decode_str_value_from_upper_bytes(&self, encoding: &[u8]) -> Result { - let bytes = self.decode_value_from_upper_bytes(encoding)?; - Ok(str::from_utf8(&bytes)?.to_string()) + fn encode_str_value(&self, text: &str) -> Result { + self.string_store.put(text.as_bytes()) } - fn decode_url_value_from_upper_bytes(&self, encoding: &[u8]) -> Result { - let bytes = self.decode_value_from_upper_bytes(encoding)?; + fn decode_url_value(&self, id: usize) -> Result { + let bytes = self.decode_value(id)?; Ok(Url::from_str(str::from_utf8(&bytes)?)?) } - fn decode_value_from_upper_bytes(&self, encoding: &[u8]) -> Result { - self.string_store - .get(&encoding[TYPE_KEY_SIZE..TYPE_KEY_SIZE + STRING_KEY_SIZE])? - .ok_or(Error::from("value not found in the dictionary")) - } - - fn decode_str_value_from_lower_bytes(&self, encoding: &[u8]) -> Result { - let bytes = self.decode_value_from_lower_bytes(encoding)?; - Ok(str::from_utf8(&bytes)?.to_string()) - } - - fn decode_url_value_from_lower_bytes(&self, encoding: &[u8]) -> Result { - let bytes = self.decode_value_from_lower_bytes(encoding)?; - Ok(Url::from_str(str::from_utf8(&bytes)?)?) + fn decode_str_value(&self, id: usize) -> Result { + let bytes = self.decode_value(id)?; + Ok(str::from_utf8(&bytes)?.to_owned()) } - fn decode_value_from_lower_bytes(&self, encoding: &[u8]) -> Result { + fn decode_value(&self, id: usize) -> Result { self.string_store - .get(&encoding[TYPE_KEY_SIZE + STRING_KEY_SIZE..TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE])? - .ok_or(Error::from("value not found in the dictionary")) + .get(id)? + .ok_or("value not found in the dictionary".into()) } } @@ -327,37 +363,34 @@ impl Default for Encoder { } mod test { - use errors::*; use model::*; use std::cell::RefCell; use std::collections::BTreeMap; - use std::str::FromStr; use store::numeric_encoder::*; use utils::to_bytes; #[derive(Default)] struct MemoryBytesStore { - id2str: RefCell>>, - str2id: RefCell, [u8; STRING_KEY_SIZE]>>, + id2str: RefCell>>, + str2id: RefCell, usize>>, } impl BytesStore for MemoryBytesStore { type BytesOutput = Vec; - fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()> { + fn put(&self, value: &[u8]) -> Result { let mut str2id = self.str2id.borrow_mut(); let mut id2str = self.id2str.borrow_mut(); let id = str2id.entry(value.to_vec()).or_insert_with(|| { - let id = to_bytes(id2str.len()); + let id = id2str.len(); id2str.insert(id, value.to_vec()); id }); - id_buffer.copy_from_slice(id); - Ok(()) + Ok(*id) } - fn get(&self, id: &[u8]) -> Result>> { - Ok(self.id2str.borrow().get(id).map(|s| s.to_owned())) + fn get(&self, id: usize) -> Result>> { + Ok(self.id2str.borrow().get(&id).map(|s| s.to_owned())) } } @@ -376,7 +409,7 @@ mod test { ]; for term in terms { let encoded = encoder.encode_term(&term).unwrap(); - assert_eq!(term, encoder.decode_term(encoded).unwrap()) + assert_eq!(term, encoder.decode_term(&encoded).unwrap()) } } diff --git a/src/store/rocksdb/mod.rs b/src/store/rocksdb/mod.rs index 8112e142..f486e5ab 100644 --- a/src/store/rocksdb/mod.rs +++ b/src/store/rocksdb/mod.rs @@ -90,6 +90,6 @@ impl<'a, I: Iterator>> Iterator for QuadsIterator<'a, fn next(&mut self) -> Option> { self.iter .next() - .map(|k| k.and_then(|quad| self.encoder.decode_quad(quad))) + .map(|k| k.and_then(|quad| self.encoder.decode_quad(&quad))) } } diff --git a/src/store/rocksdb/storage.rs b/src/store/rocksdb/storage.rs index 67b89ba9..e894326d 100644 --- a/src/store/rocksdb/storage.rs +++ b/src/store/rocksdb/storage.rs @@ -11,6 +11,7 @@ use std::str; use std::sync::Mutex; use store::numeric_encoder::*; use utils::from_bytes; +use utils::from_bytes_slice; use utils::to_bytes; const ID2STR_CF: &'static str = "id2str"; @@ -74,7 +75,7 @@ impl RocksDbStore { subject: EncodedTerm, ) -> Result> { let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; - iter.seek(subject.as_ref()); + iter.seek(&encode_term(&subject)); Ok(FilteringEncodedQuadsIterator { iter: SPOGIndexIterator { iter }, filter: EncodedQuadPattern::new(Some(subject), None, None, None), @@ -113,7 +114,7 @@ impl RocksDbStore { predicate: EncodedTerm, ) -> Result> { let mut iter = self.db.raw_iterator_cf(self.posg_cf)?; - iter.seek(predicate.as_ref()); + iter.seek(&encode_term(&predicate)); Ok(FilteringEncodedQuadsIterator { iter: POSGIndexIterator { iter }, filter: EncodedQuadPattern::new(None, Some(predicate), None, None), @@ -138,7 +139,7 @@ impl RocksDbStore { object: EncodedTerm, ) -> Result> { let mut iter = self.db.raw_iterator_cf(self.ospg_cf)?; - iter.seek(object.as_ref()); + iter.seek(&encode_term(&object)); Ok(FilteringEncodedQuadsIterator { iter: OSPGIndexIterator { iter }, filter: EncodedQuadPattern::new(None, None, Some(object), None), @@ -168,7 +169,7 @@ impl RocksDbStore { pub fn get_cf(db: &DB, name: &str) -> Result { db.cf_handle(name) - .ok_or_else(|| Error::from("column family not found")) + .ok_or_else(|| "column family not found".into()) } pub struct RocksDbBytesStore<'a>(&'a RocksDbStore); @@ -176,29 +177,28 @@ pub struct RocksDbBytesStore<'a>(&'a RocksDbStore); impl<'a> BytesStore for RocksDbBytesStore<'a> { type BytesOutput = DBVector; - fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()> { - match self.0.db.get_cf(self.0.str2id_cf, value)? { - Some(id) => id_buffer.copy_from_slice(&id), + fn put(&self, value: &[u8]) -> Result { + Ok(match self.0.db.get_cf(self.0.str2id_cf, value)? { + Some(id) => from_bytes_slice(&id), None => { - let id = to_bytes( - self.0 - .str_id_counter - .lock() - .unwrap() - .get_and_increment(&self.0.db)?, - ); + let id = self + .0 + .str_id_counter + .lock() + .unwrap() + .get_and_increment(&self.0.db)?; + let id_bytes = to_bytes(id); let mut batch = WriteBatch::default(); - batch.put_cf(self.0.id2str_cf, &id, value)?; - batch.put_cf(self.0.str2id_cf, value, &id)?; + batch.put_cf(self.0.id2str_cf, &id_bytes, value)?; + batch.put_cf(self.0.str2id_cf, value, &id_bytes)?; self.0.db.write(batch)?; - id_buffer.copy_from_slice(&id) + id } - } - Ok(()) + }) } - fn get(&self, id: &[u8]) -> Result> { - Ok(self.0.db.get_cf(self.0.id2str_cf, id)?) + fn get(&self, id: usize) -> Result> { + Ok(self.0.db.get_cf(self.0.id2str_cf, &to_bytes(id))?) } } @@ -229,7 +229,7 @@ struct EncodedQuadPattern { subject: Option, predicate: Option, object: Option, - graph_name: Option, + graph_name: Option>, } impl EncodedQuadPattern { @@ -237,7 +237,7 @@ impl EncodedQuadPattern { subject: Option, predicate: Option, object: Option, - graph_name: Option, + graph_name: Option>, ) -> Self { Self { subject, @@ -272,23 +272,25 @@ impl EncodedQuadPattern { } } -fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> [u8; 2 * TERM_ENCODING_SIZE] { - let mut bytes = [0 as u8; 2 * TERM_ENCODING_SIZE]; - bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref()); - bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref()); - bytes +fn encode_term(t: &EncodedTerm) -> Vec { + let mut vec = Vec::with_capacity(t.encoding_size()); + t.add_to_vec(&mut vec); + vec +} + +fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> Vec { + let mut vec = Vec::with_capacity(t1.encoding_size() + t2.encoding_size()); + t1.add_to_vec(&mut vec); + t2.add_to_vec(&mut vec); + vec } -fn encode_term_triple( - t1: &EncodedTerm, - t2: &EncodedTerm, - t3: &EncodedTerm, -) -> [u8; 3 * TERM_ENCODING_SIZE] { - let mut bytes = [0 as u8; 3 * TERM_ENCODING_SIZE]; - bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref()); - bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref()); - bytes[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(t3.as_ref()); - bytes +fn encode_term_triple(t1: &EncodedTerm, t2: &EncodedTerm, t3: &EncodedTerm) -> Vec { + let mut vec = Vec::with_capacity(t1.encoding_size() + t2.encoding_size() + t3.encoding_size()); + t1.add_to_vec(&mut vec); + t2.add_to_vec(&mut vec); + t3.add_to_vec(&mut vec); + vec } pub struct SPOGIndexIterator { @@ -347,7 +349,7 @@ impl>> Iterator for FilteringEncodedQuads fn next(&mut self) -> Option> { self.iter.next().filter(|quad| match quad { Ok(quad) => self.filter.filter(quad), - Err(e) => true, + Err(_) => true, }) } } diff --git a/src/utils.rs b/src/utils.rs index 156d6101..63736a62 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -90,3 +90,9 @@ pub fn from_bytes(bytes: [u8; size_of::()]) -> usize { //TODO: remove when next rust version stabilize this method unsafe { transmute(bytes) } } + +pub fn from_bytes_slice(bytes: &[u8]) -> usize { + let mut buf = [0 as u8; size_of::()]; + buf.copy_from_slice(bytes); + from_bytes(buf) +}