Moves MemoryStringStore out of memory store

pull/10/head
Tpt 6 years ago
parent 8faba13f5a
commit c3d11a7024
  1. 63
      lib/src/store/memory.rs
  2. 93
      lib/src/store/numeric_encoder.rs
  3. 19
      lib/src/store/rocksdb.rs
  4. 17
      lib/src/utils.rs

@ -1,14 +1,12 @@
use failure::Backtrace;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::str::FromStr;
use std::sync::PoisonError;
use std::sync::RwLock; use std::sync::RwLock;
use std::sync::RwLockReadGuard; use std::sync::RwLockReadGuard;
use std::sync::RwLockWriteGuard; use std::sync::RwLockWriteGuard;
use store::encoded::*; use store::encoded::*;
use store::numeric_encoder::*; use store::numeric_encoder::*;
use url::Url; use url::Url;
use utils::MutexPoisonError;
use Result; use Result;
/// Memory based implementation of the `rudf::model::Dataset` trait. /// Memory based implementation of the `rudf::model::Dataset` trait.
@ -49,20 +47,16 @@ pub type MemoryDataset = StoreDataset<MemoryStore>;
pub type MemoryGraph = StoreDefaultGraph<MemoryStore>; pub type MemoryGraph = StoreDefaultGraph<MemoryStore>;
pub struct MemoryStore { pub struct MemoryStore {
id2str: RwLock<Vec<String>>, string_store: MemoryStringStore,
str2id: RwLock<BTreeMap<String, u64>>,
graph_indexes: RwLock<BTreeMap<EncodedTerm, MemoryGraphIndexes>>, graph_indexes: RwLock<BTreeMap<EncodedTerm, MemoryGraphIndexes>>,
} }
impl Default for MemoryStore { impl Default for MemoryStore {
fn default() -> Self { fn default() -> Self {
let new = Self { Self {
id2str: RwLock::default(), string_store: MemoryStringStore::default(),
str2id: RwLock::default(),
graph_indexes: RwLock::default(), graph_indexes: RwLock::default(),
}; }
new.set_first_strings().unwrap();
new
} }
} }
@ -75,32 +69,15 @@ struct MemoryGraphIndexes {
impl StringStore for MemoryStore { impl StringStore for MemoryStore {
fn insert_str(&self, value: &str) -> Result<u64> { fn insert_str(&self, value: &str) -> Result<u64> {
let mut id2str = self.id2str.write().map_err(MemoryStorePoisonError::from)?; self.string_store.insert_str(value)
let mut str2id = self.str2id.write().map_err(MemoryStorePoisonError::from)?;
let id = str2id.entry(value.to_string()).or_insert_with(|| {
let id = id2str.len() as u64;
id2str.push(value.to_string());
id
});
Ok(*id)
} }
fn get_str(&self, id: u64) -> Result<String> { fn get_str(&self, id: u64) -> Result<String> {
let id2str = self.id2str.read().map_err(MemoryStorePoisonError::from)?; self.string_store.get_str(id)
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(id2str[id as usize].to_owned())
}
} }
fn get_url(&self, id: u64) -> Result<Url> { fn get_url(&self, id: u64) -> Result<Url> {
let id2str = self.id2str.read().map_err(MemoryStorePoisonError::from)?; self.string_store.get_url(id)
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(Url::from_str(&id2str[id as usize])?)
}
} }
} }
@ -505,32 +482,12 @@ impl EncodedQuadsStore for MemoryStore {
impl MemoryStore { impl MemoryStore {
fn graph_indexes(&self) -> Result<RwLockReadGuard<BTreeMap<EncodedTerm, MemoryGraphIndexes>>> { fn graph_indexes(&self) -> Result<RwLockReadGuard<BTreeMap<EncodedTerm, MemoryGraphIndexes>>> {
Ok(self Ok(self.graph_indexes.read().map_err(MutexPoisonError::from)?)
.graph_indexes
.read()
.map_err(MemoryStorePoisonError::from)?)
} }
fn graph_indexes_mut( fn graph_indexes_mut(
&self, &self,
) -> Result<RwLockWriteGuard<BTreeMap<EncodedTerm, MemoryGraphIndexes>>> { ) -> Result<RwLockWriteGuard<BTreeMap<EncodedTerm, MemoryGraphIndexes>>> {
Ok(self Ok(self.graph_indexes.write().map_err(MutexPoisonError::from)?)
.graph_indexes
.write()
.map_err(MemoryStorePoisonError::from)?)
}
}
#[derive(Debug, Fail)]
#[fail(display = "MemoryStore Mutex was poisoned")]
pub struct MemoryStorePoisonError {
backtrace: Backtrace,
}
impl<T> From<PoisonError<T>> for MemoryStorePoisonError {
fn from(_: PoisonError<T>) -> Self {
Self {
backtrace: Backtrace::new(),
}
} }
} }

@ -7,10 +7,14 @@ use model::vocab::xsd;
use model::*; use model::*;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::collections::BTreeMap;
use std::io::Read; use std::io::Read;
use std::io::Write; use std::io::Write;
use std::str; use std::str;
use std::str::FromStr;
use std::sync::RwLock;
use url::Url; use url::Url;
use utils::MutexPoisonError;
use uuid::Uuid; use uuid::Uuid;
use Result; use Result;
@ -50,6 +54,53 @@ pub trait StringStore {
} }
} }
pub struct MemoryStringStore {
id2str: RwLock<Vec<String>>,
str2id: RwLock<BTreeMap<String, u64>>,
}
impl Default for MemoryStringStore {
fn default() -> Self {
let new = Self {
id2str: RwLock::default(),
str2id: RwLock::default(),
};
new.set_first_strings().unwrap();
new
}
}
impl StringStore for MemoryStringStore {
fn insert_str(&self, value: &str) -> Result<u64> {
let mut id2str = self.id2str.write().map_err(MutexPoisonError::from)?;
let mut str2id = self.str2id.write().map_err(MutexPoisonError::from)?;
let id = str2id.entry(value.to_string()).or_insert_with(|| {
let id = id2str.len() as u64;
id2str.push(value.to_string());
id
});
Ok(*id)
}
fn get_str(&self, id: u64) -> Result<String> {
let id2str = self.id2str.read().map_err(MutexPoisonError::from)?;
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(id2str[id as usize].to_owned())
}
}
fn get_url(&self, id: u64) -> Result<Url> {
let id2str = self.id2str.read().map_err(MutexPoisonError::from)?;
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(Url::from_str(&id2str[id as usize])?)
}
}
}
const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
const TYPE_NAMED_NODE_ID: u8 = 1; const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_BLANK_NODE_ID: u8 = 2; const TYPE_BLANK_NODE_ID: u8 = 2;
@ -652,48 +703,6 @@ impl<S: StringStore + Default> Default for Encoder<S> {
} }
mod test { mod test {
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::str::FromStr;
use store::numeric_encoder::*;
#[derive(Default)]
struct MemoryStringStore {
id2str: RefCell<Vec<String>>,
str2id: RefCell<BTreeMap<String, u64>>,
}
impl StringStore for MemoryStringStore {
fn insert_str(&self, value: &str) -> Result<u64> {
let mut str2id = self.str2id.borrow_mut();
let mut id2str = self.id2str.borrow_mut();
let id = str2id.entry(value.to_string()).or_insert_with(|| {
let id = id2str.len() as u64;
id2str.push(value.to_string());
id
});
Ok(*id)
}
fn get_str(&self, id: u64) -> Result<String> {
let id2str = self.id2str.borrow();
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(id2str[id as usize].to_owned())
}
}
fn get_url(&self, id: u64) -> Result<Url> {
let id2str = self.id2str.borrow();
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(Url::from_str(&id2str[id as usize])?)
}
}
}
#[test] #[test]
fn test_encoding() { fn test_encoding() {
use model::*; use model::*;

@ -1,6 +1,5 @@
use byteorder::ByteOrder; use byteorder::ByteOrder;
use byteorder::LittleEndian; use byteorder::LittleEndian;
use failure::Backtrace;
use rocksdb::ColumnFamily; use rocksdb::ColumnFamily;
use rocksdb::DBRawIterator; use rocksdb::DBRawIterator;
use rocksdb::Options; use rocksdb::Options;
@ -12,11 +11,11 @@ use std::path::Path;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Mutex; use std::sync::Mutex;
use std::sync::PoisonError;
use store::encoded::EncodedQuadsStore; use store::encoded::EncodedQuadsStore;
use store::encoded::StoreDataset; use store::encoded::StoreDataset;
use store::numeric_encoder::*; use store::numeric_encoder::*;
use url::Url; use url::Url;
use utils::MutexPoisonError;
use Result; use Result;
/// `rudf::model::Dataset` trait implementation based on the [RocksDB](https://rocksdb.org/) key-value store /// `rudf::model::Dataset` trait implementation based on the [RocksDB](https://rocksdb.org/) key-value store
@ -94,7 +93,7 @@ impl StringStore for RocksDbStore {
let id = self let id = self
.str_id_counter .str_id_counter
.lock() .lock()
.map_err(RocksDBCounterMutexPoisonError::from)? .map_err(MutexPoisonError::from)?
.get_and_increment(&self.db)? as u64; .get_and_increment(&self.db)? as u64;
let id_bytes = to_bytes(id); let id_bytes = to_bytes(id);
let mut batch = WriteBatch::default(); let mut batch = WriteBatch::default();
@ -549,20 +548,6 @@ fn to_bytes(int: u64) -> [u8; 8] {
buf buf
} }
#[derive(Debug, Fail)]
#[fail(display = "RocksDBStore Mutex was poisoned")]
pub struct RocksDBCounterMutexPoisonError {
backtrace: Backtrace,
}
impl<T> From<PoisonError<T>> for RocksDBCounterMutexPoisonError {
fn from(_: PoisonError<T>) -> Self {
Self {
backtrace: Backtrace::new(),
}
}
}
// TODO: very bad but I believe it is fine // TODO: very bad but I believe it is fine
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
struct SendColumnFamily(ColumnFamily); struct SendColumnFamily(ColumnFamily);

@ -1,3 +1,6 @@
use failure::Backtrace;
use std::sync::PoisonError;
pub trait Escaper { pub trait Escaper {
fn escape(&self) -> String; fn escape(&self) -> String;
} }
@ -109,3 +112,17 @@ impl<K: 'static + Copy + Eq, V: 'static + Copy> StaticSliceMap<K, V> {
None None
} }
} }
#[derive(Debug, Fail)]
#[fail(display = "Mutex Mutex was poisoned")]
pub struct MutexPoisonError {
backtrace: Backtrace,
}
impl<T> From<PoisonError<T>> for MutexPoisonError {
fn from(_: PoisonError<T>) -> Self {
Self {
backtrace: Backtrace::new(),
}
}
}

Loading…
Cancel
Save