Transforms BytesStore into StringStore

pull/10/head
Tpt 6 years ago
parent b2d9218aee
commit 8faba13f5a
  1. 84
      lib/src/sparql/eval.rs
  2. 23
      lib/src/store/encoded.rs
  3. 36
      lib/src/store/memory.rs
  4. 120
      lib/src/store/numeric_encoder.rs
  5. 28
      lib/src/store/rocksdb.rs

@ -17,7 +17,6 @@ use std::collections::BTreeMap;
use std::collections::HashSet; use std::collections::HashSet;
use std::iter::once; use std::iter::once;
use std::iter::Iterator; use std::iter::Iterator;
use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use std::sync::Mutex; use std::sync::Mutex;
@ -540,9 +539,9 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => { | EncodedTerm::StringLiteral { value_id } => {
match &*self.store.get_bytes(value_id).ok()?? { match &*self.store.get_str(value_id).ok()? {
b"true" | b"1" => Some(true.into()), "true" | "1" => Some(true.into()),
b"false" | b"0" => Some(false.into()), "false" | "0" => Some(false.into()),
_ => None, _ => None,
} }
} }
@ -557,14 +556,9 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
Some(if value { 1. as f64 } else { 0. }.into()) Some(if value { 1. as f64 } else { 0. }.into())
} }
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => { | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DoubleLiteral(
Some(EncodedTerm::DoubleLiteral(OrderedFloat( OrderedFloat(self.store.get_str(value_id).ok()?.parse().ok()?),
str::from_utf8(&self.store.get_bytes(value_id).ok()??) )),
.ok()?
.parse()
.ok()?,
)))
}
_ => None, _ => None,
}, },
PlanExpression::FloatCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::FloatCast(e) => match self.eval_expression(e, tuple)? {
@ -576,14 +570,9 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
Some(if value { 1. as f32 } else { 0. }.into()) Some(if value { 1. as f32 } else { 0. }.into())
} }
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => { | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::FloatLiteral(
Some(EncodedTerm::FloatLiteral(OrderedFloat( OrderedFloat(self.store.get_str(value_id).ok()?.parse().ok()?),
str::from_utf8(&self.store.get_bytes(value_id).ok()??) )),
.ok()?
.parse()
.ok()?,
)))
}
_ => None, _ => None,
}, },
PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? {
@ -594,10 +583,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()), EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()),
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral( | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral(
str::from_utf8(&self.store.get_bytes(value_id).ok()??) self.store.get_str(value_id).ok()?.parse().ok()?,
.ok()?
.parse()
.ok()?,
)), )),
_ => None, _ => None,
}, },
@ -615,10 +601,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
), ),
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral( | EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral(
str::from_utf8(&self.store.get_bytes(value_id).ok()??) self.store.get_str(value_id).ok()?.parse().ok()?,
.ok()?
.parse()
.ok()?,
)), )),
_ => None, _ => None,
}, },
@ -627,8 +610,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
EncodedTerm::NaiveDateTime(value) => Some(value.into()), EncodedTerm::NaiveDateTime(value) => Some(value.into()),
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } => { | EncodedTerm::StringLiteral { value_id } => {
let bytes = self.store.get_bytes(value_id).ok()??; let value = self.store.get_str(value_id).ok()?;
let value = str::from_utf8(&bytes).ok()?;
Some(match DateTime::parse_from_rfc3339(&value) { Some(match DateTime::parse_from_rfc3339(&value) {
Ok(value) => value.into(), Ok(value) => value.into(),
Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S") Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S")
@ -668,36 +650,20 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
| EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id), | EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id),
EncodedTerm::BooleanLiteral(value) => self EncodedTerm::BooleanLiteral(value) => self
.store .store
.insert_bytes(if value { b"true" } else { b"false" }) .insert_str(if value { "true" } else { "false" })
.ok(), .ok(),
EncodedTerm::FloatLiteral(value) => { EncodedTerm::FloatLiteral(value) => self.store.insert_str(&value.to_string()).ok(),
self.store.insert_bytes(value.to_string().as_bytes()).ok() EncodedTerm::DoubleLiteral(value) => self.store.insert_str(&value.to_string()).ok(),
} EncodedTerm::IntegerLiteral(value) => self.store.insert_str(&value.to_string()).ok(),
EncodedTerm::DoubleLiteral(value) => { EncodedTerm::DecimalLiteral(value) => self.store.insert_str(&value.to_string()).ok(),
self.store.insert_bytes(value.to_string().as_bytes()).ok() EncodedTerm::DateTime(value) => self.store.insert_str(&value.to_string()).ok(),
} EncodedTerm::NaiveDateTime(value) => self.store.insert_str(&value.to_string()).ok(),
EncodedTerm::IntegerLiteral(value) => {
self.store.insert_bytes(value.to_string().as_bytes()).ok()
}
EncodedTerm::DecimalLiteral(value) => {
self.store.insert_bytes(value.to_string().as_bytes()).ok()
}
EncodedTerm::DateTime(value) => {
self.store.insert_bytes(value.to_string().as_bytes()).ok()
}
EncodedTerm::NaiveDateTime(value) => {
self.store.insert_bytes(value.to_string().as_bytes()).ok()
}
} }
} }
fn to_simple_string(&self, term: EncodedTerm) -> Option<String> { fn to_simple_string(&self, term: EncodedTerm) -> Option<String> {
if let EncodedTerm::SimpleLiteral { value_id } = term { if let EncodedTerm::SimpleLiteral { value_id } = term {
Some( self.store.get_str(value_id).ok()
str::from_utf8(&self.store.get_bytes(value_id).ok()??)
.ok()?
.to_owned(),
)
} else { } else {
None None
} }
@ -707,11 +673,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
match term { match term {
EncodedTerm::SimpleLiteral { value_id } EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id } | EncodedTerm::StringLiteral { value_id }
| EncodedTerm::LangStringLiteral { value_id, .. } => Some( | EncodedTerm::LangStringLiteral { value_id, .. } => self.store.get_str(value_id).ok(),
str::from_utf8(&self.store.get_bytes(value_id).ok()??)
.ok()?
.to_owned(),
),
_ => None, _ => None,
} }
} }
@ -875,7 +837,7 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
} }
fn compare_str_ids(&self, a: u64, b: u64) -> Option<Ordering> { fn compare_str_ids(&self, a: u64, b: u64) -> Option<Ordering> {
if let (Ok(Some(a)), Ok(Some(b))) = (self.store.get_bytes(a), self.store.get_bytes(b)) { if let (Ok(a), Ok(b)) = (self.store.get_str(a), self.store.get_str(b)) {
Some(a.cmp(&b)) Some(a.cmp(&b))
} else { } else {
None None
@ -1181,7 +1143,7 @@ fn get_triple_template_value(
} }
} }
fn decode_triple<S: BytesStore>( fn decode_triple<S: StringStore>(
encoder: &Encoder<S>, encoder: &Encoder<S>,
subject: EncodedTerm, subject: EncodedTerm,
predicate: EncodedTerm, predicate: EncodedTerm,

@ -6,11 +6,12 @@ use std::iter::FromIterator;
use std::iter::Iterator; use std::iter::Iterator;
use std::sync::Arc; use std::sync::Arc;
use store::numeric_encoder::*; use store::numeric_encoder::*;
use url::Url;
use Result; use Result;
/// Defines the Store traits that is used to have efficient binary storage /// Defines the Store traits that is used to have efficient binary storage
pub trait EncodedQuadsStore: BytesStore + Sized + 'static { pub trait EncodedQuadsStore: StringStore + Sized + 'static {
type QuadsIterator: Iterator<Item = Result<EncodedQuad>> + 'static; type QuadsIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
type QuadsForSubjectIterator: Iterator<Item = Result<EncodedQuad>> + 'static; type QuadsForSubjectIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
type QuadsForSubjectPredicateIterator: Iterator<Item = Result<EncodedQuad>> + 'static; type QuadsForSubjectPredicateIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
@ -27,8 +28,8 @@ pub trait EncodedQuadsStore: BytesStore + Sized + 'static {
type QuadsForPredicateObjectGraphIterator: Iterator<Item = Result<EncodedQuad>> + 'static; type QuadsForPredicateObjectGraphIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
type QuadsForObjectGraphIterator: Iterator<Item = Result<EncodedQuad>> + 'static; type QuadsForObjectGraphIterator: Iterator<Item = Result<EncodedQuad>> + 'static;
fn encoder(&self) -> Encoder<DelegatingBytesStore<Self>> { fn encoder(&self) -> Encoder<DelegatingStringStore<Self>> {
Encoder::new(DelegatingBytesStore(&self)) Encoder::new(DelegatingStringStore(&self))
} }
fn quads(&self) -> Result<Self::QuadsIterator>; fn quads(&self) -> Result<Self::QuadsIterator>;
@ -865,17 +866,19 @@ impl<S: EncodedQuadsStore> fmt::Display for StoreUnionGraph<S> {
} }
} }
pub struct DelegatingBytesStore<'a, S: 'a + BytesStore + Sized>(&'a S); pub struct DelegatingStringStore<'a, S: 'a + StringStore + Sized>(&'a S);
impl<'a, S: BytesStore> BytesStore for DelegatingBytesStore<'a, S> { impl<'a, S: StringStore> StringStore for DelegatingStringStore<'a, S> {
type BytesOutput = S::BytesOutput; fn insert_str(&self, value: &str) -> Result<u64> {
self.0.insert_str(value)
}
fn insert_bytes(&self, value: &[u8]) -> Result<u64> { fn get_str(&self, id: u64) -> Result<String> {
self.0.insert_bytes(value) self.0.get_str(id)
} }
fn get_bytes(&self, id: u64) -> Result<Option<S::BytesOutput>> { fn get_url(&self, id: u64) -> Result<Url> {
self.0.get_bytes(id) self.0.get_url(id)
} }
} }

@ -1,12 +1,14 @@
use failure::Backtrace; use failure::Backtrace;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::str::FromStr;
use std::sync::PoisonError; use std::sync::PoisonError;
use std::sync::RwLock; use std::sync::RwLock;
use std::sync::RwLockReadGuard; use std::sync::RwLockReadGuard;
use std::sync::RwLockWriteGuard; use std::sync::RwLockWriteGuard;
use store::encoded::*; use store::encoded::*;
use store::numeric_encoder::*; use store::numeric_encoder::*;
use url::Url;
use Result; use Result;
/// Memory based implementation of the `rudf::model::Dataset` trait. /// Memory based implementation of the `rudf::model::Dataset` trait.
@ -47,8 +49,8 @@ pub type MemoryDataset = StoreDataset<MemoryStore>;
pub type MemoryGraph = StoreDefaultGraph<MemoryStore>; pub type MemoryGraph = StoreDefaultGraph<MemoryStore>;
pub struct MemoryStore { pub struct MemoryStore {
id2str: RwLock<Vec<Vec<u8>>>, id2str: RwLock<Vec<String>>,
str2id: RwLock<BTreeMap<Vec<u8>, u64>>, str2id: RwLock<BTreeMap<String, u64>>,
graph_indexes: RwLock<BTreeMap<EncodedTerm, MemoryGraphIndexes>>, graph_indexes: RwLock<BTreeMap<EncodedTerm, MemoryGraphIndexes>>,
} }
@ -71,28 +73,34 @@ struct MemoryGraphIndexes {
osp: BTreeMap<EncodedTerm, BTreeMap<EncodedTerm, BTreeSet<EncodedTerm>>>, osp: BTreeMap<EncodedTerm, BTreeMap<EncodedTerm, BTreeSet<EncodedTerm>>>,
} }
impl BytesStore for MemoryStore { impl StringStore for MemoryStore {
type BytesOutput = Vec<u8>; fn insert_str(&self, value: &str) -> Result<u64> {
fn insert_bytes(&self, value: &[u8]) -> Result<u64> {
let mut id2str = self.id2str.write().map_err(MemoryStorePoisonError::from)?; let mut id2str = self.id2str.write().map_err(MemoryStorePoisonError::from)?;
let mut str2id = self.str2id.write().map_err(MemoryStorePoisonError::from)?; let mut str2id = self.str2id.write().map_err(MemoryStorePoisonError::from)?;
let id = str2id.entry(value.to_vec()).or_insert_with(|| { let id = str2id.entry(value.to_string()).or_insert_with(|| {
let id = id2str.len() as u64; let id = id2str.len() as u64;
id2str.push(value.to_vec()); id2str.push(value.to_string());
id id
}); });
Ok(*id) Ok(*id)
} }
fn get_bytes(&self, id: u64) -> Result<Option<Vec<u8>>> { fn get_str(&self, id: u64) -> Result<String> {
//TODO: use try_from when stable
let id2str = self.id2str.read().map_err(MemoryStorePoisonError::from)?; let id2str = self.id2str.read().map_err(MemoryStorePoisonError::from)?;
Ok(if id2str.len() as u64 <= id { if id2str.len() as u64 <= id {
None Err(format_err!("value not found in the dictionary"))
} else { } else {
Some(id2str[id as usize].to_owned()) Ok(id2str[id as usize].to_owned())
}) }
}
fn get_url(&self, id: u64) -> Result<Url> {
let id2str = self.id2str.read().map_err(MemoryStorePoisonError::from)?;
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(Url::from_str(&id2str[id as usize])?)
}
} }
} }

@ -9,9 +9,7 @@ use ordered_float::OrderedFloat;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::io::Read; use std::io::Read;
use std::io::Write; use std::io::Write;
use std::ops::Deref;
use std::str; use std::str;
use std::str::FromStr;
use url::Url; use url::Url;
use uuid::Uuid; use uuid::Uuid;
use Result; use Result;
@ -26,23 +24,22 @@ const XSD_INTEGER_ID: u64 = 6;
const XSD_DECIMAL_ID: u64 = 7; const XSD_DECIMAL_ID: u64 = 7;
const XSD_DATE_TIME_ID: u64 = 8; const XSD_DATE_TIME_ID: u64 = 8;
pub trait BytesStore { pub trait StringStore {
type BytesOutput: Deref<Target = [u8]>; fn insert_str(&self, value: &str) -> Result<u64>;
fn get_str(&self, id: u64) -> Result<String>;
fn insert_bytes(&self, value: &[u8]) -> Result<u64>; fn get_url(&self, id: u64) -> Result<Url>;
fn get_bytes(&self, id: u64) -> Result<Option<Self::BytesOutput>>;
/// Should be called when the bytes store is created /// Should be called when the bytes store is created
fn set_first_strings(&self) -> Result<()> { fn set_first_strings(&self) -> Result<()> {
if EMPTY_STRING_ID == self.insert_bytes(b"")? if EMPTY_STRING_ID == self.insert_str("")?
&& RDF_LANG_STRING_ID == self.insert_bytes(rdf::LANG_STRING.as_str().as_bytes())? && RDF_LANG_STRING_ID == self.insert_str(rdf::LANG_STRING.as_str())?
&& XSD_STRING_ID == self.insert_bytes(xsd::STRING.as_str().as_bytes())? && XSD_STRING_ID == self.insert_str(xsd::STRING.as_str())?
&& XSD_BOOLEAN_ID == self.insert_bytes(xsd::BOOLEAN.as_str().as_bytes())? && XSD_BOOLEAN_ID == self.insert_str(xsd::BOOLEAN.as_str())?
&& XSD_FLOAT_ID == self.insert_bytes(xsd::FLOAT.as_str().as_bytes())? && XSD_FLOAT_ID == self.insert_str(xsd::FLOAT.as_str())?
&& XSD_DOUBLE_ID == self.insert_bytes(xsd::DOUBLE.as_str().as_bytes())? && XSD_DOUBLE_ID == self.insert_str(xsd::DOUBLE.as_str())?
&& XSD_INTEGER_ID == self.insert_bytes(xsd::INTEGER.as_str().as_bytes())? && XSD_INTEGER_ID == self.insert_str(xsd::INTEGER.as_str())?
&& XSD_DECIMAL_ID == self.insert_bytes(xsd::DECIMAL.as_str().as_bytes())? && XSD_DECIMAL_ID == self.insert_str(xsd::DECIMAL.as_str())?
&& XSD_DATE_TIME_ID == self.insert_bytes(xsd::DATE_TIME.as_str().as_bytes())? && XSD_DATE_TIME_ID == self.insert_str(xsd::DATE_TIME.as_str())?
{ {
Ok(()) Ok(())
} else { } else {
@ -446,18 +443,18 @@ impl<R: Write> TermWriter for R {
} }
} }
pub struct Encoder<S: BytesStore> { pub struct Encoder<S: StringStore> {
string_store: S, string_store: S,
} }
impl<S: BytesStore> Encoder<S> { impl<S: StringStore> Encoder<S> {
pub fn new(string_store: S) -> Self { pub fn new(string_store: S) -> Self {
Self { string_store } Self { string_store }
} }
pub fn encode_named_node(&self, named_node: &NamedNode) -> Result<EncodedTerm> { pub fn encode_named_node(&self, named_node: &NamedNode) -> Result<EncodedTerm> {
Ok(EncodedTerm::NamedNode { Ok(EncodedTerm::NamedNode {
iri_id: self.encode_str_value(named_node.as_str())?, iri_id: self.string_store.insert_str(named_node.as_str())?,
}) })
} }
@ -469,17 +466,17 @@ impl<S: BytesStore> Encoder<S> {
Ok(if literal.is_plain() { Ok(if literal.is_plain() {
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.string_store.insert_str(&literal.value())?,
language_id: self.encode_str_value(language)?, language_id: self.string_store.insert_str(language)?,
} }
} else { } else {
EncodedTerm::SimpleLiteral { EncodedTerm::SimpleLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.string_store.insert_str(&literal.value())?,
} }
} }
} else if literal.is_string() { } else if literal.is_string() {
EncodedTerm::StringLiteral { EncodedTerm::StringLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.string_store.insert_str(&literal.value())?,
} }
} else if literal.is_boolean() { } else if literal.is_boolean() {
literal literal
@ -518,8 +515,8 @@ impl<S: BytesStore> Encoder<S> {
.into() .into()
} else { } else {
EncodedTerm::TypedLiteral { EncodedTerm::TypedLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.string_store.insert_str(&literal.value())?,
datatype_id: self.encode_str_value(literal.datatype().as_str())?, datatype_id: self.string_store.insert_str(literal.datatype().as_str())?,
} }
}) })
} }
@ -570,28 +567,28 @@ impl<S: BytesStore> Encoder<S> {
Err(format_err!("The default graph tag is not a valid term")) Err(format_err!("The default graph tag is not a valid term"))
} }
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::from(self.decode_url_value(iri_id)?).into()) Ok(NamedNode::from(self.string_store.get_url(iri_id)?).into())
} }
EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()),
EncodedTerm::SimpleLiteral { value_id } => { EncodedTerm::SimpleLiteral { value_id } => {
Ok(Literal::new_simple_literal(self.decode_str_value(value_id)?).into()) Ok(Literal::new_simple_literal(self.string_store.get_str(value_id)?).into())
} }
EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id, value_id,
language_id, language_id,
} => Ok(Literal::new_language_tagged_literal( } => Ok(Literal::new_language_tagged_literal(
self.decode_str_value(value_id)?, self.string_store.get_str(value_id)?,
self.decode_str_value(language_id)?, self.string_store.get_str(language_id)?,
).into()), ).into()),
EncodedTerm::TypedLiteral { EncodedTerm::TypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => Ok(Literal::new_typed_literal( } => Ok(Literal::new_typed_literal(
self.decode_str_value(value_id)?, self.string_store.get_str(value_id)?,
NamedNode::from(self.decode_url_value(datatype_id)?), NamedNode::from(self.string_store.get_url(datatype_id)?),
).into()), ).into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => {
Ok(Literal::from(self.decode_str_value(value_id)?).into()) Ok(Literal::from(self.string_store.get_str(value_id)?).into())
} }
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(value).into()), EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(value).into()),
EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()), EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
@ -644,29 +641,9 @@ impl<S: BytesStore> Encoder<S> {
}, },
)) ))
} }
fn encode_str_value(&self, text: &str) -> Result<u64> {
self.string_store.insert_bytes(text.as_bytes())
}
fn decode_url_value(&self, id: u64) -> Result<Url> {
let bytes = self.decode_value(id)?;
Ok(Url::from_str(str::from_utf8(&bytes)?)?)
}
fn decode_str_value(&self, id: u64) -> Result<String> {
let bytes = self.decode_value(id)?;
Ok(str::from_utf8(&bytes)?.to_owned())
}
fn decode_value(&self, id: u64) -> Result<S::BytesOutput> {
self.string_store
.get_bytes(id)?
.ok_or_else(|| format_err!("value not found in the dictionary"))
}
} }
impl<S: BytesStore + Default> Default for Encoder<S> { impl<S: StringStore + Default> Default for Encoder<S> {
fn default() -> Self { fn default() -> Self {
Self { Self {
string_store: S::default(), string_store: S::default(),
@ -677,30 +654,43 @@ impl<S: BytesStore + Default> Default for Encoder<S> {
mod test { mod test {
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::str::FromStr;
use store::numeric_encoder::*; use store::numeric_encoder::*;
#[derive(Default)] #[derive(Default)]
struct MemoryBytesStore { struct MemoryStringStore {
id2str: RefCell<BTreeMap<u64, Vec<u8>>>, id2str: RefCell<Vec<String>>,
str2id: RefCell<BTreeMap<Vec<u8>, u64>>, str2id: RefCell<BTreeMap<String, u64>>,
} }
impl BytesStore for MemoryBytesStore { impl StringStore for MemoryStringStore {
type BytesOutput = Vec<u8>; fn insert_str(&self, value: &str) -> Result<u64> {
fn insert_bytes(&self, value: &[u8]) -> Result<u64> {
let mut str2id = self.str2id.borrow_mut(); let mut str2id = self.str2id.borrow_mut();
let mut id2str = self.id2str.borrow_mut(); let mut id2str = self.id2str.borrow_mut();
let id = str2id.entry(value.to_vec()).or_insert_with(|| { let id = str2id.entry(value.to_string()).or_insert_with(|| {
let id = id2str.len() as u64; let id = id2str.len() as u64;
id2str.insert(id, value.to_vec()); id2str.push(value.to_string());
id id
}); });
Ok(*id) Ok(*id)
} }
fn get_bytes(&self, id: u64) -> Result<Option<Vec<u8>>> { fn get_str(&self, id: u64) -> Result<String> {
Ok(self.id2str.borrow().get(&id).map(|s| s.to_owned())) let id2str = self.id2str.borrow();
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(id2str[id as usize].to_owned())
}
}
fn get_url(&self, id: u64) -> Result<Url> {
let id2str = self.id2str.borrow();
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(Url::from_str(&id2str[id as usize])?)
}
} }
} }
@ -708,7 +698,7 @@ mod test {
fn test_encoding() { fn test_encoding() {
use model::*; use model::*;
let encoder: Encoder<MemoryBytesStore> = Encoder::default(); let encoder: Encoder<MemoryStringStore> = Encoder::default();
let terms: Vec<Term> = vec![ let terms: Vec<Term> = vec![
NamedNode::from_str("http://foo.com").unwrap().into(), NamedNode::from_str("http://foo.com").unwrap().into(),
NamedNode::from_str("http://bar.com").unwrap().into(), NamedNode::from_str("http://bar.com").unwrap().into(),

@ -3,7 +3,6 @@ use byteorder::LittleEndian;
use failure::Backtrace; use failure::Backtrace;
use rocksdb::ColumnFamily; use rocksdb::ColumnFamily;
use rocksdb::DBRawIterator; use rocksdb::DBRawIterator;
use rocksdb::DBVector;
use rocksdb::Options; use rocksdb::Options;
use rocksdb::WriteBatch; use rocksdb::WriteBatch;
use rocksdb::DB; use rocksdb::DB;
@ -11,11 +10,13 @@ use std::io::Cursor;
use std::ops::Deref; use std::ops::Deref;
use std::path::Path; use std::path::Path;
use std::str; use std::str;
use std::str::FromStr;
use std::sync::Mutex; use std::sync::Mutex;
use std::sync::PoisonError; use std::sync::PoisonError;
use store::encoded::EncodedQuadsStore; use store::encoded::EncodedQuadsStore;
use store::encoded::StoreDataset; use store::encoded::StoreDataset;
use store::numeric_encoder::*; use store::numeric_encoder::*;
use url::Url;
use Result; use Result;
/// `rudf::model::Dataset` trait implementation based on the [RocksDB](https://rocksdb.org/) key-value store /// `rudf::model::Dataset` trait implementation based on the [RocksDB](https://rocksdb.org/) key-value store
@ -84,10 +85,9 @@ impl RocksDbStore {
} }
} }
impl BytesStore for RocksDbStore { impl StringStore for RocksDbStore {
type BytesOutput = DBVector; fn insert_str(&self, value: &str) -> Result<u64> {
let value = value.as_bytes();
fn insert_bytes(&self, value: &[u8]) -> Result<u64> {
Ok(if let Some(id) = self.db.get_cf(*self.str2id_cf, value)? { Ok(if let Some(id) = self.db.get_cf(*self.str2id_cf, value)? {
LittleEndian::read_u64(&id) LittleEndian::read_u64(&id)
} else { } else {
@ -105,8 +105,22 @@ impl BytesStore for RocksDbStore {
}) })
} }
fn get_bytes(&self, id: u64) -> Result<Option<DBVector>> { fn get_str(&self, id: u64) -> Result<String> {
Ok(self.db.get_cf(*self.id2str_cf, &to_bytes(id))?) let value = self.db.get_cf(*self.id2str_cf, &to_bytes(id))?;
if let Some(value) = value {
Ok(str::from_utf8(&value)?.to_owned())
} else {
Err(format_err!("value not found in the dictionary"))
}
}
fn get_url(&self, id: u64) -> Result<Url> {
let value = self.db.get_cf(*self.id2str_cf, &to_bytes(id))?;
if let Some(value) = value {
Ok(Url::from_str(str::from_utf8(&value)?)?)
} else {
Err(format_err!("value not found in the dictionary"))
}
} }
} }

Loading…
Cancel
Save