parent
28cb7b276c
commit
49eda33d0a
@ -0,0 +1,8 @@ |
|||||||
|
error_chain! { |
||||||
|
foreign_links { |
||||||
|
Url(::url::ParseError); |
||||||
|
Uuid(::uuid::ParseError); |
||||||
|
RocksDB(::rocksdb::Error); |
||||||
|
Utf8(::std::str::Utf8Error); |
||||||
|
} |
||||||
|
} |
@ -1,2 +1,4 @@ |
|||||||
pub mod isomorphism; |
pub mod isomorphism; |
||||||
pub mod memory; |
pub mod memory; |
||||||
|
mod numeric_encoder; |
||||||
|
pub mod rocksdb; |
||||||
|
@ -0,0 +1,386 @@ |
|||||||
|
use errors::*; |
||||||
|
use model::*; |
||||||
|
use std::ops::Deref; |
||||||
|
use std::str; |
||||||
|
use std::str::FromStr; |
||||||
|
use url::Url; |
||||||
|
use uuid::Uuid; |
||||||
|
|
||||||
|
pub const STRING_KEY_SIZE: usize = 8; |
||||||
|
|
||||||
|
pub trait BytesStore { |
||||||
|
type BytesOutput: Deref<Target = [u8]>; |
||||||
|
|
||||||
|
fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()>; |
||||||
|
fn get(&self, id: &[u8]) -> Result<Option<Self::BytesOutput>>; |
||||||
|
} |
||||||
|
|
||||||
|
const TYPE_KEY_SIZE: usize = 1; |
||||||
|
const TYPE_NAMED_NODE_ID: u8 = 1; |
||||||
|
const TYPE_BLANK_NODE_ID: u8 = 2; |
||||||
|
const TYPE_LANG_STRING_LITERAL_ID: u8 = 3; |
||||||
|
const TYPE_TYPED_LITERAL_ID: u8 = 4; |
||||||
|
pub const TERM_ENCODING_SIZE: usize = TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE; |
||||||
|
const EMPTY_TERM: [u8; TERM_ENCODING_SIZE] = [0 as u8; TERM_ENCODING_SIZE]; |
||||||
|
|
||||||
|
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||||
|
pub struct EncodedTerm([u8; TERM_ENCODING_SIZE]); |
||||||
|
|
||||||
|
impl EncodedTerm { |
||||||
|
pub fn new_from_buffer(buffer: &[u8]) -> Result<Self> { |
||||||
|
if buffer.len() != TERM_ENCODING_SIZE { |
||||||
|
return Err("the term buffer has not the correct length".into()); |
||||||
|
} |
||||||
|
let mut buf = [0 as u8; TERM_ENCODING_SIZE]; |
||||||
|
buf.copy_from_slice(buffer); |
||||||
|
return Ok(EncodedTerm(buf)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl AsRef<[u8]> for EncodedTerm { |
||||||
|
fn as_ref(&self) -> &[u8] { |
||||||
|
&self.0[..] |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] |
||||||
|
pub struct EncodedQuad { |
||||||
|
pub subject: EncodedTerm, |
||||||
|
pub predicate: EncodedTerm, |
||||||
|
pub object: EncodedTerm, |
||||||
|
pub graph_name: EncodedTerm, |
||||||
|
} |
||||||
|
|
||||||
|
impl EncodedQuad { |
||||||
|
pub fn new_from_spog_buffer(buffer: &[u8]) -> Result<Self> { |
||||||
|
if buffer.len() != 4 * TERM_ENCODING_SIZE { |
||||||
|
return Err("the spog buffer has not the correct length".into()); |
||||||
|
} |
||||||
|
Ok(Self { |
||||||
|
subject: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, |
||||||
|
predicate: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
object: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
graph_name: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn new_from_posg_buffer(buffer: &[u8]) -> Result<Self> { |
||||||
|
if buffer.len() != 4 * TERM_ENCODING_SIZE { |
||||||
|
return Err("the posg buffer has not the correct length".into()); |
||||||
|
} |
||||||
|
Ok(Self { |
||||||
|
subject: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
predicate: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, |
||||||
|
object: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
graph_name: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn new_from_ospg_buffer(buffer: &[u8]) -> Result<Self> { |
||||||
|
if buffer.len() != 4 * TERM_ENCODING_SIZE { |
||||||
|
return Err("the ospg buffer has not the correct length".into()); |
||||||
|
} |
||||||
|
Ok(Self { |
||||||
|
subject: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
predicate: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
object: EncodedTerm::new_from_buffer(&buffer[0..TERM_ENCODING_SIZE])?, |
||||||
|
graph_name: EncodedTerm::new_from_buffer( |
||||||
|
&buffer[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE], |
||||||
|
)?, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn spog(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { |
||||||
|
let mut spog = [0 as u8; 4 * TERM_ENCODING_SIZE]; |
||||||
|
spog[0..TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); |
||||||
|
spog[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.predicate.as_ref()); |
||||||
|
spog[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); |
||||||
|
spog[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] |
||||||
|
.copy_from_slice(self.graph_name.as_ref()); |
||||||
|
spog |
||||||
|
} |
||||||
|
|
||||||
|
pub fn posg(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { |
||||||
|
let mut posg = [0 as u8; 4 * TERM_ENCODING_SIZE]; |
||||||
|
posg[0..TERM_ENCODING_SIZE].copy_from_slice(self.predicate.as_ref()); |
||||||
|
posg[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); |
||||||
|
posg[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); |
||||||
|
posg[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] |
||||||
|
.copy_from_slice(self.graph_name.as_ref()); |
||||||
|
posg |
||||||
|
} |
||||||
|
|
||||||
|
pub fn ospg(&self) -> [u8; 4 * TERM_ENCODING_SIZE] { |
||||||
|
let mut ospg = [0 as u8; 4 * TERM_ENCODING_SIZE]; |
||||||
|
ospg[0..TERM_ENCODING_SIZE].copy_from_slice(self.object.as_ref()); |
||||||
|
ospg[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(self.subject.as_ref()); |
||||||
|
ospg[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE] |
||||||
|
.copy_from_slice(self.predicate.as_ref()); |
||||||
|
ospg[3 * TERM_ENCODING_SIZE..4 * TERM_ENCODING_SIZE] |
||||||
|
.copy_from_slice(self.graph_name.as_ref()); |
||||||
|
ospg |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub struct Encoder<S: BytesStore> { |
||||||
|
string_store: S, |
||||||
|
} |
||||||
|
|
||||||
|
impl<S: BytesStore> Encoder<S> { |
||||||
|
pub fn new(string_store: S) -> Self { |
||||||
|
Self { string_store } |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_named_node(&self, named_node: &NamedNode) -> Result<EncodedTerm> { |
||||||
|
let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; |
||||||
|
bytes[0] = TYPE_NAMED_NODE_ID; |
||||||
|
self.encode_str_value_to_lower_bytes(named_node.as_str(), &mut bytes)?; |
||||||
|
Ok(EncodedTerm(bytes)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_blank_node(&self, blank_node: &BlankNode) -> Result<EncodedTerm> { |
||||||
|
let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; |
||||||
|
bytes[0] = TYPE_BLANK_NODE_ID; |
||||||
|
bytes[TYPE_KEY_SIZE..TERM_ENCODING_SIZE].copy_from_slice(blank_node.as_bytes()); |
||||||
|
Ok(EncodedTerm(bytes)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_literal(&self, literal: &Literal) -> Result<EncodedTerm> { |
||||||
|
let mut bytes = [0 as u8; TERM_ENCODING_SIZE]; |
||||||
|
if let Some(language) = literal.language() { |
||||||
|
bytes[0] = TYPE_LANG_STRING_LITERAL_ID; |
||||||
|
self.encode_str_value_to_upper_bytes(language, &mut bytes)?; |
||||||
|
} else { |
||||||
|
bytes[0] = TYPE_TYPED_LITERAL_ID; |
||||||
|
self.encode_str_value_to_upper_bytes(literal.datatype().as_str(), &mut bytes)?; |
||||||
|
} |
||||||
|
self.encode_str_value_to_lower_bytes(literal.value().as_str(), &mut bytes)?; |
||||||
|
Ok(EncodedTerm(bytes)) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_named_or_blank_node(&self, term: &NamedOrBlankNode) -> Result<EncodedTerm> { |
||||||
|
match term { |
||||||
|
NamedOrBlankNode::NamedNode(named_node) => self.encode_named_node(named_node), |
||||||
|
NamedOrBlankNode::BlankNode(blank_node) => self.encode_blank_node(blank_node), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_optional_named_or_blank_node( |
||||||
|
&self, |
||||||
|
term: &Option<NamedOrBlankNode>, |
||||||
|
) -> Result<EncodedTerm> { |
||||||
|
match term { |
||||||
|
Some(node) => self.encode_named_or_blank_node(node), |
||||||
|
None => Ok(EncodedTerm(EMPTY_TERM)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_term(&self, term: &Term) -> Result<EncodedTerm> { |
||||||
|
match term { |
||||||
|
Term::NamedNode(named_node) => self.encode_named_node(named_node), |
||||||
|
Term::BlankNode(blank_node) => self.encode_blank_node(blank_node), |
||||||
|
Term::Literal(literal) => self.encode_literal(literal), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn encode_quad(&self, quad: &Quad) -> Result<EncodedQuad> { |
||||||
|
Ok(EncodedQuad { |
||||||
|
subject: self.encode_named_or_blank_node(quad.subject())?, |
||||||
|
predicate: self.encode_named_node(quad.predicate())?, |
||||||
|
object: self.encode_term(quad.object())?, |
||||||
|
graph_name: self.encode_optional_named_or_blank_node(quad.graph_name())?, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn decode_term(&self, encoded: impl AsRef<[u8]>) -> Result<Term> { |
||||||
|
let encoding = encoded.as_ref(); |
||||||
|
match encoding[0] { |
||||||
|
TYPE_NAMED_NODE_ID => { |
||||||
|
let iri = self.decode_url_value_from_lower_bytes(encoding)?; |
||||||
|
Ok(NamedNode::from(iri).into()) |
||||||
|
} |
||||||
|
TYPE_BLANK_NODE_ID => Ok(BlankNode::from(Uuid::from_bytes(&encoding[1..])?).into()), |
||||||
|
TYPE_LANG_STRING_LITERAL_ID => { |
||||||
|
let value = self.decode_str_value_from_lower_bytes(encoding)?; |
||||||
|
let language = self.decode_str_value_from_upper_bytes(encoding)?; |
||||||
|
Ok(Literal::new_language_tagged_literal(value, language).into()) |
||||||
|
} |
||||||
|
TYPE_TYPED_LITERAL_ID => { |
||||||
|
let value = self.decode_str_value_from_lower_bytes(encoding)?; |
||||||
|
let datatype = NamedNode::from(self.decode_url_value_from_upper_bytes(encoding)?); |
||||||
|
Ok(Literal::new_typed_literal(value, datatype).into()) |
||||||
|
} |
||||||
|
_ => Err("invalid term type encoding".into()), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn decode_named_or_blank_node( |
||||||
|
&self, |
||||||
|
encoded: impl AsRef<[u8]>, |
||||||
|
) -> Result<NamedOrBlankNode> { |
||||||
|
let encoding = encoded.as_ref(); |
||||||
|
match self.decode_term(encoding)? { |
||||||
|
Term::NamedNode(named_node) => Ok(named_node.into()), |
||||||
|
Term::BlankNode(blank_node) => Ok(blank_node.into()), |
||||||
|
Term::Literal(_) => Err("A literal has ben found instead of a named node".into()), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn decode_optional_named_or_blank_node( |
||||||
|
&self, |
||||||
|
encoded: impl AsRef<[u8]>, |
||||||
|
) -> Result<Option<NamedOrBlankNode>> { |
||||||
|
let encoding = encoded.as_ref(); |
||||||
|
if encoding == EMPTY_TERM { |
||||||
|
Ok(None) |
||||||
|
} else { |
||||||
|
Ok(Some(self.decode_named_or_blank_node(encoding)?)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn decode_named_node(&self, encoded: impl AsRef<[u8]>) -> Result<NamedNode> { |
||||||
|
let encoding = encoded.as_ref(); |
||||||
|
match self.decode_term(encoding)? { |
||||||
|
Term::NamedNode(named_node) => Ok(named_node), |
||||||
|
Term::BlankNode(_) => Err("A blank node has been found instead of a named node".into()), |
||||||
|
Term::Literal(_) => Err("A literal has ben found instead of a named node".into()), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn decode_quad(&self, encoded: EncodedQuad) -> Result<Quad> { |
||||||
|
Ok(Quad::new( |
||||||
|
self.decode_named_or_blank_node(encoded.subject)?, |
||||||
|
self.decode_named_node(encoded.predicate)?, |
||||||
|
self.decode_term(encoded.object)?, |
||||||
|
self.decode_optional_named_or_blank_node(encoded.graph_name)?, |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
fn encode_str_value_to_upper_bytes(&self, text: &str, bytes: &mut [u8]) -> Result<()> { |
||||||
|
self.string_store.put( |
||||||
|
text.as_bytes(), |
||||||
|
&mut bytes[TYPE_KEY_SIZE..TYPE_KEY_SIZE + STRING_KEY_SIZE], |
||||||
|
) |
||||||
|
} |
||||||
|
fn encode_str_value_to_lower_bytes(&self, text: &str, bytes: &mut [u8]) -> Result<()> { |
||||||
|
self.string_store.put( |
||||||
|
text.as_bytes(), |
||||||
|
&mut bytes[TYPE_KEY_SIZE + STRING_KEY_SIZE..TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE], |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_str_value_from_upper_bytes(&self, encoding: &[u8]) -> Result<String> { |
||||||
|
let bytes = self.decode_value_from_upper_bytes(encoding)?; |
||||||
|
Ok(str::from_utf8(&bytes)?.to_string()) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_url_value_from_upper_bytes(&self, encoding: &[u8]) -> Result<Url> { |
||||||
|
let bytes = self.decode_value_from_upper_bytes(encoding)?; |
||||||
|
Ok(Url::from_str(str::from_utf8(&bytes)?)?) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_value_from_upper_bytes(&self, encoding: &[u8]) -> Result<S::BytesOutput> { |
||||||
|
self.string_store |
||||||
|
.get(&encoding[TYPE_KEY_SIZE..TYPE_KEY_SIZE + STRING_KEY_SIZE])? |
||||||
|
.ok_or(Error::from("value not found in the dictionary")) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_str_value_from_lower_bytes(&self, encoding: &[u8]) -> Result<String> { |
||||||
|
let bytes = self.decode_value_from_lower_bytes(encoding)?; |
||||||
|
Ok(str::from_utf8(&bytes)?.to_string()) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_url_value_from_lower_bytes(&self, encoding: &[u8]) -> Result<Url> { |
||||||
|
let bytes = self.decode_value_from_lower_bytes(encoding)?; |
||||||
|
Ok(Url::from_str(str::from_utf8(&bytes)?)?) |
||||||
|
} |
||||||
|
|
||||||
|
fn decode_value_from_lower_bytes(&self, encoding: &[u8]) -> Result<S::BytesOutput> { |
||||||
|
self.string_store |
||||||
|
.get(&encoding[TYPE_KEY_SIZE + STRING_KEY_SIZE..TYPE_KEY_SIZE + 2 * STRING_KEY_SIZE])? |
||||||
|
.ok_or(Error::from("value not found in the dictionary")) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl<S: BytesStore + Default> Default for Encoder<S> { |
||||||
|
fn default() -> Self { |
||||||
|
Self { |
||||||
|
string_store: S::default(), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
mod test { |
||||||
|
use errors::*; |
||||||
|
use model::*; |
||||||
|
use std::cell::RefCell; |
||||||
|
use std::collections::BTreeMap; |
||||||
|
use std::str::FromStr; |
||||||
|
use store::numeric_encoder::BytesStore; |
||||||
|
use store::numeric_encoder::Encoder; |
||||||
|
use store::numeric_encoder::STRING_KEY_SIZE; |
||||||
|
use store::numeric_encoder::TERM_ENCODING_SIZE; |
||||||
|
use utils::to_bytes; |
||||||
|
|
||||||
|
#[derive(Default)] |
||||||
|
struct MemoryBytesStore { |
||||||
|
id2str: RefCell<BTreeMap<[u8; STRING_KEY_SIZE], Vec<u8>>>, |
||||||
|
str2id: RefCell<BTreeMap<Vec<u8>, [u8; STRING_KEY_SIZE]>>, |
||||||
|
} |
||||||
|
|
||||||
|
impl BytesStore for MemoryBytesStore { |
||||||
|
type BytesOutput = Vec<u8>; |
||||||
|
|
||||||
|
fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()> { |
||||||
|
let mut str2id = self.str2id.borrow_mut(); |
||||||
|
let mut id2str = self.id2str.borrow_mut(); |
||||||
|
let id = str2id.entry(value.to_vec()).or_insert_with(|| { |
||||||
|
let id = to_bytes(id2str.len()); |
||||||
|
id2str.insert(id, value.to_vec()); |
||||||
|
id |
||||||
|
}); |
||||||
|
id_buffer.copy_from_slice(id); |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
|
||||||
|
fn get(&self, id: &[u8]) -> Result<Option<Vec<u8>>> { |
||||||
|
Ok(self.id2str.borrow().get(id).map(|s| s.to_owned())) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#[test] |
||||||
|
fn test_encoding() { |
||||||
|
let encoder: Encoder<MemoryBytesStore> = Encoder::default(); |
||||||
|
let terms: Vec<Term> = vec![ |
||||||
|
NamedNode::from_str("http://foo.com").unwrap().into(), |
||||||
|
NamedNode::from_str("http://bar.com").unwrap().into(), |
||||||
|
NamedNode::from_str("http://foo.com").unwrap().into(), |
||||||
|
BlankNode::default().into(), |
||||||
|
Literal::from(true).into(), |
||||||
|
Literal::from(1.2).into(), |
||||||
|
Literal::from("foo").into(), |
||||||
|
Literal::new_language_tagged_literal("foo", "fr").into(), |
||||||
|
]; |
||||||
|
for term in terms { |
||||||
|
let encoded = encoder.encode_term(&term).unwrap(); |
||||||
|
assert_eq!(term, encoder.decode_term(encoded).unwrap()) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,446 @@ |
|||||||
|
use errors::*; |
||||||
|
use model::*; |
||||||
|
use rocksdb::ColumnFamily; |
||||||
|
use rocksdb::DBRawIterator; |
||||||
|
use rocksdb::DBVector; |
||||||
|
use rocksdb::IteratorMode; |
||||||
|
use rocksdb::Options; |
||||||
|
use rocksdb::WriteBatch; |
||||||
|
use rocksdb::DB; |
||||||
|
use std::ops::Deref; |
||||||
|
use std::path::Path; |
||||||
|
use std::slice; |
||||||
|
use std::str; |
||||||
|
use store::numeric_encoder::BytesStore; |
||||||
|
use store::numeric_encoder::EncodedQuad; |
||||||
|
use store::numeric_encoder::EncodedTerm; |
||||||
|
use store::numeric_encoder::Encoder; |
||||||
|
use store::numeric_encoder::STRING_KEY_SIZE; |
||||||
|
use store::numeric_encoder::TERM_ENCODING_SIZE; |
||||||
|
use utils::to_bytes; |
||||||
|
|
||||||
|
pub struct RocksDbDataset { |
||||||
|
store: RocksDbStore, |
||||||
|
} |
||||||
|
|
||||||
|
impl RocksDbDataset { |
||||||
|
pub fn open(path: impl AsRef<Path>) -> Result<Self> { |
||||||
|
Ok(Self { |
||||||
|
store: RocksDbStore::open(path)?, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn graph(&self, name: &NamedOrBlankNode) -> RocksDbGraph { |
||||||
|
RocksDbGraph { |
||||||
|
store: &self.store, |
||||||
|
name: name.clone(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn default_graph(&self) -> RocksDbDefaultGraph { |
||||||
|
RocksDbDefaultGraph { store: &self.store } |
||||||
|
} |
||||||
|
|
||||||
|
fn union_graph(&self) -> RocksDbUnionGraph { |
||||||
|
RocksDbUnionGraph { store: &self.store } |
||||||
|
} |
||||||
|
|
||||||
|
fn iter(&self) -> Result<QuadsIterator<SPOGIndexIterator>> { |
||||||
|
Ok(QuadsIterator { |
||||||
|
iter: self.store.quads()?, |
||||||
|
encoder: self.store.encoder(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_subject( |
||||||
|
&self, |
||||||
|
subject: &NamedOrBlankNode, |
||||||
|
) -> Result<QuadsIterator<FilteringEncodedQuadsIterator<SPOGIndexIterator>>> { |
||||||
|
Ok(QuadsIterator { |
||||||
|
iter: self.store |
||||||
|
.quads_for_subject(self.store.encoder().encode_named_or_blank_node(subject)?)?, |
||||||
|
encoder: self.store.encoder(), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn contains(&self, quad: &Quad) -> Result<bool> { |
||||||
|
self.store |
||||||
|
.contains(&self.store.encoder().encode_quad(quad)?) |
||||||
|
} |
||||||
|
|
||||||
|
fn insert(&self, quad: &Quad) -> Result<()> { |
||||||
|
self.store.insert(&self.store.encoder().encode_quad(quad)?) |
||||||
|
} |
||||||
|
|
||||||
|
fn remove(&self, quad: &Quad) -> Result<()> { |
||||||
|
self.store.remove(&self.store.encoder().encode_quad(quad)?) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct RocksDbGraph<'a> { |
||||||
|
store: &'a RocksDbStore, |
||||||
|
name: NamedOrBlankNode, //TODO: better storage
|
||||||
|
} |
||||||
|
|
||||||
|
struct RocksDbDefaultGraph<'a> { |
||||||
|
store: &'a RocksDbStore, |
||||||
|
} |
||||||
|
|
||||||
|
struct RocksDbUnionGraph<'a> { |
||||||
|
store: &'a RocksDbStore, |
||||||
|
} |
||||||
|
|
||||||
|
const ID2STR_CF: &'static str = "id2str"; |
||||||
|
const STR2ID_CF: &'static str = "id2str"; |
||||||
|
const SPOG_CF: &'static str = "spog"; |
||||||
|
const POSG_CF: &'static str = "posg"; |
||||||
|
const OSPG_CF: &'static str = "ospg"; |
||||||
|
|
||||||
|
const EMPTY_BUF: [u8; 0] = [0 as u8; 0]; |
||||||
|
|
||||||
|
//TODO: indexes for the default graph and indexes for the named graphs (no more Optional and space saving)
|
||||||
|
|
||||||
|
const COLUMN_FAMILIES: [&'static str; 5] = [ID2STR_CF, STR2ID_CF, SPOG_CF, POSG_CF, OSPG_CF]; |
||||||
|
|
||||||
|
struct RocksDbStore { |
||||||
|
db: DB, |
||||||
|
id2str_cf: ColumnFamily, |
||||||
|
str2id_cf: ColumnFamily, |
||||||
|
spog_cf: ColumnFamily, |
||||||
|
posg_cf: ColumnFamily, |
||||||
|
ospg_cf: ColumnFamily, |
||||||
|
} |
||||||
|
|
||||||
|
impl RocksDbStore { |
||||||
|
fn open(path: impl AsRef<Path>) -> Result<Self> { |
||||||
|
let options = Options::default(); |
||||||
|
|
||||||
|
let db = DB::open_cf(&options, path, &COLUMN_FAMILIES)?; |
||||||
|
let id2str_cf = get_cf(&db, STR2ID_CF)?; |
||||||
|
let str2id_cf = get_cf(&db, ID2STR_CF)?; |
||||||
|
let spog_cf = get_cf(&db, SPOG_CF)?; |
||||||
|
let posg_cf = get_cf(&db, POSG_CF)?; |
||||||
|
let ospg_cf = get_cf(&db, OSPG_CF)?; |
||||||
|
|
||||||
|
Ok(Self { |
||||||
|
db, |
||||||
|
id2str_cf, |
||||||
|
str2id_cf, |
||||||
|
spog_cf, |
||||||
|
posg_cf, |
||||||
|
ospg_cf, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn encoder(&self) -> Encoder<RocksDbBytesStore> { |
||||||
|
Encoder::new(RocksDbBytesStore(&self)) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads(&self) -> Result<SPOGIndexIterator> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; |
||||||
|
iter.seek_to_first(); |
||||||
|
Ok(SPOGIndexIterator { iter }) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_subject( |
||||||
|
&self, |
||||||
|
subject: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; |
||||||
|
iter.seek(subject.as_ref()); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: SPOGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(Some(subject), None, None, None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_subject_predicate( |
||||||
|
&self, |
||||||
|
subject: EncodedTerm, |
||||||
|
predicate: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; |
||||||
|
iter.seek(&encode_term_pair(&subject, &predicate)); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: SPOGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(Some(subject), Some(predicate), None, None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_subject_predicate_object( |
||||||
|
&self, |
||||||
|
subject: EncodedTerm, |
||||||
|
predicate: EncodedTerm, |
||||||
|
object: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; |
||||||
|
iter.seek(&encode_term_triple(&subject, &predicate, &object)); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: SPOGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(Some(subject), Some(predicate), Some(object), None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_predicate( |
||||||
|
&self, |
||||||
|
predicate: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<POSGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.posg_cf)?; |
||||||
|
iter.seek(predicate.as_ref()); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: POSGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(None, Some(predicate), None, None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_predicate_object( |
||||||
|
&self, |
||||||
|
predicate: EncodedTerm, |
||||||
|
object: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<POSGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?; |
||||||
|
iter.seek(&encode_term_pair(&predicate, &object)); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: POSGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(None, Some(predicate), Some(object), None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn quads_for_object( |
||||||
|
&self, |
||||||
|
object: EncodedTerm, |
||||||
|
) -> Result<FilteringEncodedQuadsIterator<OSPGIndexIterator>> { |
||||||
|
let mut iter = self.db.raw_iterator_cf(self.ospg_cf)?; |
||||||
|
iter.seek(object.as_ref()); |
||||||
|
Ok(FilteringEncodedQuadsIterator { |
||||||
|
iter: OSPGIndexIterator { iter }, |
||||||
|
filter: EncodedQuadPattern::new(None, None, Some(object), None), |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
fn contains(&self, quad: &EncodedQuad) -> Result<bool> { |
||||||
|
Ok(self.db.get_cf(self.spog_cf, &quad.spog())?.is_some()) |
||||||
|
} |
||||||
|
|
||||||
|
fn insert(&self, quad: &EncodedQuad) -> Result<()> { |
||||||
|
let mut batch = WriteBatch::default(); |
||||||
|
batch.put_cf(self.spog_cf, &quad.spog(), &EMPTY_BUF)?; |
||||||
|
batch.put_cf(self.posg_cf, &quad.posg(), &EMPTY_BUF)?; |
||||||
|
batch.put_cf(self.ospg_cf, &quad.ospg(), &EMPTY_BUF)?; |
||||||
|
Ok(self.db.write(batch)?) //TODO: check what's going on if the key already exists
|
||||||
|
} |
||||||
|
|
||||||
|
fn remove(&self, quad: &EncodedQuad) -> Result<()> { |
||||||
|
let mut batch = WriteBatch::default(); |
||||||
|
batch.delete_cf(self.spog_cf, &quad.spog())?; |
||||||
|
batch.delete_cf(self.posg_cf, &quad.posg())?; |
||||||
|
batch.delete_cf(self.ospg_cf, &quad.ospg())?; |
||||||
|
Ok(self.db.write(batch)?) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn get_cf(db: &DB, name: &str) -> Result<ColumnFamily> { |
||||||
|
db.cf_handle(name) |
||||||
|
.ok_or_else(|| Error::from("column family not found")) |
||||||
|
} |
||||||
|
|
||||||
|
struct RocksDbBytesStore<'a>(&'a RocksDbStore); |
||||||
|
|
||||||
|
impl<'a> BytesStore for RocksDbBytesStore<'a> { |
||||||
|
type BytesOutput = DBVector; |
||||||
|
|
||||||
|
fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()> { |
||||||
|
match self.0.db.get_cf(self.0.str2id_cf, value)? { |
||||||
|
Some(id) => id_buffer.copy_from_slice(&id), |
||||||
|
None => { |
||||||
|
let mut batch = WriteBatch::default(); |
||||||
|
// TODO: id allocation
|
||||||
|
let id = [0 as u8; STRING_KEY_SIZE]; |
||||||
|
batch.put_cf(self.0.id2str_cf, &id, value)?; |
||||||
|
batch.put_cf(self.0.str2id_cf, value, &id)?; |
||||||
|
self.0.db.write(batch)?; |
||||||
|
id_buffer.copy_from_slice(&id) |
||||||
|
} |
||||||
|
} |
||||||
|
Ok(()) |
||||||
|
} |
||||||
|
|
||||||
|
fn get(&self, id: &[u8]) -> Result<Option<DBVector>> { |
||||||
|
Ok(self.0.db.get_cf(self.0.id2str_cf, id)?) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct EncodedQuadPattern { |
||||||
|
subject: Option<EncodedTerm>, |
||||||
|
predicate: Option<EncodedTerm>, |
||||||
|
object: Option<EncodedTerm>, |
||||||
|
graph_name: Option<EncodedTerm>, |
||||||
|
} |
||||||
|
|
||||||
|
impl EncodedQuadPattern { |
||||||
|
fn new( |
||||||
|
subject: Option<EncodedTerm>, |
||||||
|
predicate: Option<EncodedTerm>, |
||||||
|
object: Option<EncodedTerm>, |
||||||
|
graph_name: Option<EncodedTerm>, |
||||||
|
) -> Self { |
||||||
|
Self { |
||||||
|
subject, |
||||||
|
predicate, |
||||||
|
object, |
||||||
|
graph_name, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn filter(&self, quad: &EncodedQuad) -> bool { |
||||||
|
if let Some(ref subject) = self.subject { |
||||||
|
if &quad.subject != subject { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
if let Some(ref predicate) = self.predicate { |
||||||
|
if &quad.predicate != predicate { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
if let Some(ref object) = self.object { |
||||||
|
if &quad.object != object { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
if let Some(ref graph_name) = self.graph_name { |
||||||
|
if &quad.graph_name != graph_name { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
true |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> [u8; 2 * TERM_ENCODING_SIZE] { |
||||||
|
let mut bytes = [0 as u8; 2 * TERM_ENCODING_SIZE]; |
||||||
|
bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref()); |
||||||
|
bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref()); |
||||||
|
bytes |
||||||
|
} |
||||||
|
|
||||||
|
fn encode_term_triple( |
||||||
|
t1: &EncodedTerm, |
||||||
|
t2: &EncodedTerm, |
||||||
|
t3: &EncodedTerm, |
||||||
|
) -> [u8; 3 * TERM_ENCODING_SIZE] { |
||||||
|
let mut bytes = [0 as u8; 3 * TERM_ENCODING_SIZE]; |
||||||
|
bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref()); |
||||||
|
bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref()); |
||||||
|
bytes[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref()); |
||||||
|
bytes |
||||||
|
} |
||||||
|
|
||||||
|
struct SPOGIndexIterator { |
||||||
|
iter: DBRawIterator, |
||||||
|
} |
||||||
|
|
||||||
|
impl Iterator for SPOGIndexIterator { |
||||||
|
type Item = Result<EncodedQuad>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<EncodedQuad>> { |
||||||
|
self.iter.next(); |
||||||
|
self.iter |
||||||
|
.key() |
||||||
|
.map(|buffer| EncodedQuad::new_from_spog_buffer(&buffer)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct POSGIndexIterator { |
||||||
|
iter: DBRawIterator, |
||||||
|
} |
||||||
|
|
||||||
|
impl Iterator for POSGIndexIterator { |
||||||
|
type Item = Result<EncodedQuad>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<EncodedQuad>> { |
||||||
|
self.iter.next(); |
||||||
|
self.iter |
||||||
|
.key() |
||||||
|
.map(|buffer| EncodedQuad::new_from_posg_buffer(&buffer)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct OSPGIndexIterator { |
||||||
|
iter: DBRawIterator, |
||||||
|
} |
||||||
|
|
||||||
|
impl Iterator for OSPGIndexIterator { |
||||||
|
type Item = Result<EncodedQuad>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<EncodedQuad>> { |
||||||
|
self.iter.next(); |
||||||
|
self.iter |
||||||
|
.key() |
||||||
|
.map(|buffer| EncodedQuad::new_from_ospg_buffer(&buffer)) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct FilteringEncodedQuadsIterator<I: Iterator<Item = Result<EncodedQuad>>> { |
||||||
|
iter: I, |
||||||
|
filter: EncodedQuadPattern, |
||||||
|
} |
||||||
|
|
||||||
|
impl<I: Iterator<Item = Result<EncodedQuad>>> Iterator for FilteringEncodedQuadsIterator<I> { |
||||||
|
type Item = Result<EncodedQuad>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<EncodedQuad>> { |
||||||
|
self.iter.next().filter(|quad| match quad { |
||||||
|
Ok(quad) => self.filter.filter(quad), |
||||||
|
Err(e) => true, |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
struct QuadsIterator<'a, I: Iterator<Item = Result<EncodedQuad>>> { |
||||||
|
iter: I, |
||||||
|
encoder: Encoder<RocksDbBytesStore<'a>>, |
||||||
|
} |
||||||
|
|
||||||
|
impl<'a, I: Iterator<Item = Result<EncodedQuad>>> Iterator for QuadsIterator<'a, I> { |
||||||
|
type Item = Result<Quad>; |
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Result<Quad>> { |
||||||
|
self.iter |
||||||
|
.next() |
||||||
|
.map(|k| k.and_then(|quad| self.encoder.decode_quad(quad))) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/*fn encode_sp(
|
||||||
|
encoder: &Encoder<RocksDbBytesStore>, |
||||||
|
subject: &NamedOrBlankNode, |
||||||
|
predicate: &NamedNode, |
||||||
|
) -> Result<[u8; 2 * TERM_ENCODING_SIZE]> { |
||||||
|
let mut sp = [0 as u8; 2 * TERM_ENCODING_SIZE]; |
||||||
|
encoder.encode_named_or_blank_node(subject, &mut sp)?; |
||||||
|
encoder.encode_named_node(predicate, &mut sp)?; |
||||||
|
Ok(sp) |
||||||
|
} |
||||||
|
|
||||||
|
fn encode_po( |
||||||
|
encoder: &Encoder<RocksDbBytesStore>, |
||||||
|
predicate: &NamedNode, |
||||||
|
object: &Term, |
||||||
|
) -> Result<[u8; 2 * TERM_ENCODING_SIZE]> { |
||||||
|
let mut po = [0 as u8; 2 * TERM_ENCODING_SIZE]; |
||||||
|
encoder.encode_named_node(predicate, &mut po)?; |
||||||
|
encoder.encode_term(object, &mut po)?; |
||||||
|
Ok(po) |
||||||
|
} |
||||||
|
|
||||||
|
fn encode_os( |
||||||
|
encoder: &Encoder<RocksDbBytesStore>, |
||||||
|
object: &Term, |
||||||
|
subject: &NamedOrBlankNode, |
||||||
|
) -> Result<[u8; 2 * TERM_ENCODING_SIZE]> { |
||||||
|
let mut po = [0 as u8; 2 * TERM_ENCODING_SIZE]; |
||||||
|
encoder.encode_term(object, &mut po)?; |
||||||
|
encoder.encode_named_or_blank_node(subject, &mut po)?; |
||||||
|
Ok(po) |
||||||
|
}*/ |
Loading…
Reference in new issue