From b369eb812689efb759a2b915ee783ff4cbc68474 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 18 Nov 2019 13:58:23 +0100 Subject: [PATCH] Migrates to RocksDB 0.13 --- lib/Cargo.toml | 2 +- lib/src/sparql/eval.rs | 53 ++------------------- lib/src/sparql/plan.rs | 15 ++---- lib/src/store/memory.rs | 2 - lib/src/store/numeric_encoder.rs | 9 +--- lib/src/store/rocksdb.rs | 79 ++++++++++---------------------- 6 files changed, 36 insertions(+), 124 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index e0d2a7af..297923c9 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" [dependencies] lazy_static = "1" -rocksdb = { version = "0.12", optional = true } +rocksdb = { version = "0.13", optional = true } byteorder = { version = "1", features = ["i128"] } quick-xml = "0.17" ordered-float = "1" diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 395ac207..6ca2e14f 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -29,7 +29,6 @@ use std::fmt::Write; use std::hash::Hash; use std::iter::Iterator; use std::iter::{empty, once}; -use std::ops::Deref; use std::str; use std::sync::Mutex; @@ -1451,10 +1450,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } } - fn to_simple_string( - &self, - term: EncodedTerm, - ) -> Option< as StrLookup>::StrType> { + fn to_simple_string(&self, term: EncodedTerm) -> Option { if let EncodedTerm::StringLiteral { value_id } = term { self.dataset.get_str(value_id).ok()? } else { @@ -1470,7 +1466,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } } - fn to_string(&self, term: EncodedTerm) -> Option< as StrLookup>::StrType> { + fn to_string(&self, term: EncodedTerm) -> Option { match term { EncodedTerm::StringLiteral { value_id } | EncodedTerm::LangStringLiteral { value_id, .. } => { @@ -1480,10 +1476,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } } - fn to_string_and_language( - &self, - term: EncodedTerm, - ) -> Option<( as StrLookup>::StrType, Option)> { + fn to_string_and_language(&self, term: EncodedTerm) -> Option<(String, Option)> { match term { EncodedTerm::StringLiteral { value_id } => { Some((self.dataset.get_str(value_id).ok()??, None)) @@ -1533,11 +1526,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { &self, arg1: EncodedTerm, arg2: EncodedTerm, - ) -> Option<( - as StrLookup>::StrType, - as StrLookup>::StrType, - Option, - )> { + ) -> Option<(String, String, Option)> { let (value1, language1) = self.to_string_and_language(arg1)?; let (value2, language2) = self.to_string_and_language(arg2)?; if language2.is_none() || language1 == language2 { @@ -1893,40 +1882,6 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { } } -pub enum StringOrStoreString + ToString + Into> { - String(String), - Store(S), -} - -impl + ToString + Into> Deref for StringOrStoreString { - type Target = str; - - fn deref(&self) -> &str { - match self { - StringOrStoreString::String(s) => &*s, - StringOrStoreString::Store(s) => &*s, - } - } -} - -impl + ToString + Into> ToString for StringOrStoreString { - fn to_string(&self) -> String { - match self { - StringOrStoreString::String(s) => s.to_string(), - StringOrStoreString::Store(s) => s.to_string(), - } - } -} - -impl + ToString + Into> From> for String { - fn from(string: StringOrStoreString) -> Self { - match string { - StringOrStoreString::String(s) => s, - StringOrStoreString::Store(s) => s.into(), - } - } -} - enum NumericBinaryOperands { Float(f32, f32), Double(f64, f64), diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index 7b8a98a3..b07afbe3 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -1,4 +1,3 @@ -use crate::sparql::eval::StringOrStoreString; use crate::sparql::model::Variable; use crate::sparql::GraphPattern; use crate::store::numeric_encoder::{ @@ -493,16 +492,12 @@ impl DatasetView { } impl StrLookup for DatasetView { - type StrType = StringOrStoreString; - - fn get_str(&self, id: u128) -> Result>> { - Ok(if let Some(value) = self.extra.borrow().get_str(id)? { - Some(StringOrStoreString::String(value)) - } else if let Some(value) = self.store.get_str(id)? { - Some(StringOrStoreString::Store(value)) + fn get_str(&self, id: u128) -> Result> { + if let Some(value) = self.extra.borrow().get_str(id)? { + Ok(Some(value)) } else { - None - }) + self.store.get_str(id) + } } } diff --git a/lib/src/store/memory.rs b/lib/src/store/memory.rs index 95bf9d47..a950672c 100644 --- a/lib/src/store/memory.rs +++ b/lib/src/store/memory.rs @@ -77,8 +77,6 @@ impl<'a> Store for &'a MemoryStore { } impl<'a> StrLookup for &'a MemoryStore { - type StrType = String; - fn get_str(&self, id: u128) -> Result> { self.indexes()?.str_store.get_str(id) } diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index db8c5b95..0f574a75 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -16,7 +16,6 @@ use std::collections::HashMap; use std::io::Read; use std::io::Write; use std::mem::size_of; -use std::ops::Deref; use std::str; const EMPTY_STRING_ID: u128 = 0x7e42_f8ec_9809_80e9_04b2_008f_d98c_1dd4; @@ -693,9 +692,7 @@ impl TermWriter for W { } pub trait StrLookup { - type StrType: Deref + ToString + Into; - - fn get_str(&self, id: u128) -> Result>; + fn get_str(&self, id: u128) -> Result>; } pub trait StrContainer { @@ -733,8 +730,6 @@ impl Default for MemoryStrStore { } impl StrLookup for MemoryStrStore { - type StrType = String; - fn get_str(&self, id: u128) -> Result> { //TODO: avoid copy by adding a lifetime limit to get_str Ok(self.id2str.get(&id).cloned()) @@ -1121,7 +1116,7 @@ impl Decoder for S { } } -fn get_required_str(lookup: &S, id: u128) -> Result { +fn get_required_str(lookup: &impl StrLookup, id: u128) -> Result { lookup.get_str(id)?.ok_or_else(|| { format_err!( "Not able to find the string with id {} in the string store", diff --git a/lib/src/store/rocksdb.rs b/lib/src/store/rocksdb.rs index c0c22960..615c43b5 100644 --- a/lib/src/store/rocksdb.rs +++ b/lib/src/store/rocksdb.rs @@ -2,17 +2,10 @@ use crate::store::numeric_encoder::*; use crate::store::{Store, StoreConnection, StoreRepositoryConnection, StoreTransaction}; use crate::{Repository, Result}; use failure::format_err; -use rocksdb::ColumnFamily; -use rocksdb::DBCompactionStyle; -use rocksdb::DBRawIterator; -use rocksdb::DBVector; -use rocksdb::Options; -use rocksdb::WriteBatch; -use rocksdb::DB; +use rocksdb::*; use std::io::Cursor; use std::iter::{empty, once}; use std::mem::swap; -use std::ops::Deref; use std::path::Path; use std::str; @@ -77,13 +70,13 @@ struct RocksDbStore { #[derive(Clone)] pub struct RocksDbStoreConnection<'a> { store: &'a RocksDbStore, - id2str_cf: ColumnFamily<'a>, - spog_cf: ColumnFamily<'a>, - posg_cf: ColumnFamily<'a>, - ospg_cf: ColumnFamily<'a>, - gspo_cf: ColumnFamily<'a>, - gpos_cf: ColumnFamily<'a>, - gosp_cf: ColumnFamily<'a>, + id2str_cf: &'a ColumnFamily, + spog_cf: &'a ColumnFamily, + posg_cf: &'a ColumnFamily, + ospg_cf: &'a ColumnFamily, + gspo_cf: &'a ColumnFamily, + gpos_cf: &'a ColumnFamily, + gosp_cf: &'a ColumnFamily, } impl RocksDbRepository { @@ -139,14 +132,12 @@ impl<'a> Store for &'a RocksDbStore { } impl StrLookup for RocksDbStoreConnection<'_> { - type StrType = RocksString; - - fn get_str(&self, id: u128) -> Result> { + fn get_str(&self, id: u128) -> Result> { Ok(self .store .db .get_cf(self.id2str_cf, &id.to_le_bytes())? - .map(|v| RocksString { vec: v })) + .map(|v| unsafe { String::from_utf8_unchecked(v) })) } } @@ -164,7 +155,11 @@ impl<'a> StoreConnection for RocksDbStoreConnection<'a> { fn contains(&self, quad: &EncodedQuad) -> Result { let mut buffer = Vec::with_capacity(4 * WRITTEN_TERM_MAX_SIZE); buffer.write_spog_quad(quad)?; - Ok(self.store.db.get_cf(self.spog_cf, &buffer)?.is_some()) + Ok(self + .store + .db + .get_pinned_cf(self.spog_cf, &buffer)? + .is_some()) } fn quads_for_pattern<'b>( @@ -417,7 +412,7 @@ impl<'a> RocksDbStoreConnection<'a> { fn inner_quads( &self, - cf: ColumnFamily, + cf: &ColumnFamily, prefix: Vec, decode: impl Fn(&[u8]) -> Result + 'a, ) -> Result> + 'a> { @@ -532,7 +527,7 @@ impl<'a> StoreTransaction for RocksDbStoreTransaction<'a> { } } -fn get_cf<'a>(db: &'a DB, name: &str) -> Result> { +fn get_cf<'a>(db: &'a DB, name: &str) -> Result<&'a ColumnFamily> { db.cf_handle(name) .ok_or_else(|| format_err!("column family {} not found", name)) } @@ -578,15 +573,13 @@ impl<'a, F: Fn(&[u8]) -> Result> Iterator for DecodingIndexIterator fn next(&mut self) -> Option> { if self.iter.valid() { - let result = unsafe { - self.iter.key_inner().and_then(|key| { - if key.starts_with(&self.prefix) { - Some((self.decode)(key)) - } else { - None - } - }) - }; + let result = self.iter.key().and_then(|key| { + if key.starts_with(&self.prefix) { + Some((self.decode)(key)) + } else { + None + } + }); self.iter.next(); result } else { @@ -595,30 +588,6 @@ impl<'a, F: Fn(&[u8]) -> Result> Iterator for DecodingIndexIterator } } -pub struct RocksString { - vec: DBVector, -} - -impl Deref for RocksString { - type Target = str; - - fn deref(&self) -> &str { - unsafe { str::from_utf8_unchecked(&self.vec) } - } -} - -impl ToString for RocksString { - fn to_string(&self) -> String { - self.deref().to_owned() - } -} - -impl From for String { - fn from(val: RocksString) -> String { - val.deref().to_owned() - } -} - #[test] fn repository() -> Result<()> { use crate::model::*;