Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
10 KiB
342 lines
10 KiB
use errors::*;
|
|
use rocksdb::ColumnFamily;
|
|
use rocksdb::DBRawIterator;
|
|
use rocksdb::DBVector;
|
|
use rocksdb::Options;
|
|
use rocksdb::WriteBatch;
|
|
use rocksdb::DB;
|
|
use std::path::Path;
|
|
use std::str;
|
|
use std::sync::Mutex;
|
|
use store::numeric_encoder::*;
|
|
use utils::from_bytes;
|
|
use utils::to_bytes;
|
|
|
|
const ID2STR_CF: &'static str = "id2str";
|
|
const STR2ID_CF: &'static str = "id2str";
|
|
const SPOG_CF: &'static str = "spog";
|
|
const POSG_CF: &'static str = "posg";
|
|
const OSPG_CF: &'static str = "ospg";
|
|
|
|
const EMPTY_BUF: [u8; 0] = [0 as u8; 0];
|
|
|
|
//TODO: indexes for the default graph and indexes for the named graphs (no more Optional and space saving)
|
|
|
|
const COLUMN_FAMILIES: [&'static str; 5] = [ID2STR_CF, STR2ID_CF, SPOG_CF, POSG_CF, OSPG_CF];
|
|
|
|
pub struct RocksDbStore {
|
|
db: DB,
|
|
counter_mutex: Mutex<()>,
|
|
id2str_cf: ColumnFamily,
|
|
str2id_cf: ColumnFamily,
|
|
spog_cf: ColumnFamily,
|
|
posg_cf: ColumnFamily,
|
|
ospg_cf: ColumnFamily,
|
|
}
|
|
|
|
impl RocksDbStore {
|
|
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
|
|
let mut options = Options::default();
|
|
options.create_if_missing(true);
|
|
options.create_missing_column_families(true);
|
|
|
|
let db = DB::open_cf(&options, path, &COLUMN_FAMILIES)?;
|
|
let id2str_cf = get_cf(&db, STR2ID_CF)?;
|
|
let str2id_cf = get_cf(&db, ID2STR_CF)?;
|
|
let spog_cf = get_cf(&db, SPOG_CF)?;
|
|
let posg_cf = get_cf(&db, POSG_CF)?;
|
|
let ospg_cf = get_cf(&db, OSPG_CF)?;
|
|
|
|
Ok(Self {
|
|
db,
|
|
counter_mutex: Mutex::new(()),
|
|
id2str_cf,
|
|
str2id_cf,
|
|
spog_cf,
|
|
posg_cf,
|
|
ospg_cf,
|
|
})
|
|
}
|
|
|
|
pub fn encoder(&self) -> Encoder<RocksDbBytesStore> {
|
|
Encoder::new(RocksDbBytesStore(&self))
|
|
}
|
|
|
|
pub fn quads(&self) -> Result<SPOGIndexIterator> {
|
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?;
|
|
iter.seek_to_first();
|
|
Ok(SPOGIndexIterator { iter })
|
|
}
|
|
|
|
pub fn quads_for_subject(
|
|
&self,
|
|
subject: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?;
|
|
iter.seek(subject.as_ref());
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: SPOGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(Some(subject), None, None, None),
|
|
})
|
|
}
|
|
|
|
pub fn quads_for_subject_predicate(
|
|
&self,
|
|
subject: EncodedTerm,
|
|
predicate: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?;
|
|
iter.seek(&encode_term_pair(&subject, &predicate));
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: SPOGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(Some(subject), Some(predicate), None, None),
|
|
})
|
|
}
|
|
|
|
pub fn quads_for_subject_predicate_object(
|
|
&self,
|
|
subject: EncodedTerm,
|
|
predicate: EncodedTerm,
|
|
object: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<SPOGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?;
|
|
iter.seek(&encode_term_triple(&subject, &predicate, &object));
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: SPOGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(Some(subject), Some(predicate), Some(object), None),
|
|
})
|
|
}
|
|
|
|
pub fn quads_for_predicate(
|
|
&self,
|
|
predicate: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<POSGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.posg_cf)?;
|
|
iter.seek(predicate.as_ref());
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: POSGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(None, Some(predicate), None, None),
|
|
})
|
|
}
|
|
|
|
pub fn quads_for_predicate_object(
|
|
&self,
|
|
predicate: EncodedTerm,
|
|
object: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<POSGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.spog_cf)?;
|
|
iter.seek(&encode_term_pair(&predicate, &object));
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: POSGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(None, Some(predicate), Some(object), None),
|
|
})
|
|
}
|
|
|
|
pub fn quads_for_object(
|
|
&self,
|
|
object: EncodedTerm,
|
|
) -> Result<FilteringEncodedQuadsIterator<OSPGIndexIterator>> {
|
|
let mut iter = self.db.raw_iterator_cf(self.ospg_cf)?;
|
|
iter.seek(object.as_ref());
|
|
Ok(FilteringEncodedQuadsIterator {
|
|
iter: OSPGIndexIterator { iter },
|
|
filter: EncodedQuadPattern::new(None, None, Some(object), None),
|
|
})
|
|
}
|
|
|
|
pub fn contains(&self, quad: &EncodedQuad) -> Result<bool> {
|
|
Ok(self.db.get_cf(self.spog_cf, &quad.spog())?.is_some())
|
|
}
|
|
|
|
pub fn insert(&self, quad: &EncodedQuad) -> Result<()> {
|
|
let mut batch = WriteBatch::default();
|
|
batch.put_cf(self.spog_cf, &quad.spog(), &EMPTY_BUF)?;
|
|
batch.put_cf(self.posg_cf, &quad.posg(), &EMPTY_BUF)?;
|
|
batch.put_cf(self.ospg_cf, &quad.ospg(), &EMPTY_BUF)?;
|
|
Ok(self.db.write(batch)?) //TODO: check what's going on if the key already exists
|
|
}
|
|
|
|
pub fn remove(&self, quad: &EncodedQuad) -> Result<()> {
|
|
let mut batch = WriteBatch::default();
|
|
batch.delete_cf(self.spog_cf, &quad.spog())?;
|
|
batch.delete_cf(self.posg_cf, &quad.posg())?;
|
|
batch.delete_cf(self.ospg_cf, &quad.ospg())?;
|
|
Ok(self.db.write(batch)?)
|
|
}
|
|
}
|
|
|
|
pub fn get_cf(db: &DB, name: &str) -> Result<ColumnFamily> {
|
|
db.cf_handle(name)
|
|
.ok_or_else(|| Error::from("column family not found"))
|
|
}
|
|
|
|
pub struct RocksDbBytesStore<'a>(&'a RocksDbStore);
|
|
|
|
impl<'a> RocksDbBytesStore<'a> {
|
|
fn new_id(&self) -> Result<[u8; STRING_KEY_SIZE]> {
|
|
let _ = self.0.counter_mutex.lock();
|
|
let id = self.0
|
|
.db
|
|
.get(b"bsc")?
|
|
.map(|v| {
|
|
let mut id = [0 as u8; STRING_KEY_SIZE];
|
|
id.copy_from_slice(&v);
|
|
id
|
|
})
|
|
.unwrap_or_else(|| [0 as u8; STRING_KEY_SIZE]);
|
|
self.0
|
|
.db
|
|
.put(b"bsc", &to_bytes(from_bytes(id.clone()) + 1))?;
|
|
Ok(id)
|
|
}
|
|
}
|
|
|
|
impl<'a> BytesStore for RocksDbBytesStore<'a> {
|
|
type BytesOutput = DBVector;
|
|
|
|
fn put(&self, value: &[u8], id_buffer: &mut [u8]) -> Result<()> {
|
|
match self.0.db.get_cf(self.0.str2id_cf, value)? {
|
|
Some(id) => id_buffer.copy_from_slice(&id),
|
|
None => {
|
|
let id = self.new_id()?;
|
|
let mut batch = WriteBatch::default();
|
|
batch.put_cf(self.0.id2str_cf, &id, value)?;
|
|
batch.put_cf(self.0.str2id_cf, value, &id)?;
|
|
self.0.db.write(batch)?;
|
|
id_buffer.copy_from_slice(&id)
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn get(&self, id: &[u8]) -> Result<Option<DBVector>> {
|
|
Ok(self.0.db.get_cf(self.0.id2str_cf, id)?)
|
|
}
|
|
}
|
|
|
|
struct EncodedQuadPattern {
|
|
subject: Option<EncodedTerm>,
|
|
predicate: Option<EncodedTerm>,
|
|
object: Option<EncodedTerm>,
|
|
graph_name: Option<EncodedTerm>,
|
|
}
|
|
|
|
impl EncodedQuadPattern {
|
|
fn new(
|
|
subject: Option<EncodedTerm>,
|
|
predicate: Option<EncodedTerm>,
|
|
object: Option<EncodedTerm>,
|
|
graph_name: Option<EncodedTerm>,
|
|
) -> Self {
|
|
Self {
|
|
subject,
|
|
predicate,
|
|
object,
|
|
graph_name,
|
|
}
|
|
}
|
|
|
|
fn filter(&self, quad: &EncodedQuad) -> bool {
|
|
if let Some(ref subject) = self.subject {
|
|
if &quad.subject != subject {
|
|
return false;
|
|
}
|
|
}
|
|
if let Some(ref predicate) = self.predicate {
|
|
if &quad.predicate != predicate {
|
|
return false;
|
|
}
|
|
}
|
|
if let Some(ref object) = self.object {
|
|
if &quad.object != object {
|
|
return false;
|
|
}
|
|
}
|
|
if let Some(ref graph_name) = self.graph_name {
|
|
if &quad.graph_name != graph_name {
|
|
return false;
|
|
}
|
|
}
|
|
true
|
|
}
|
|
}
|
|
|
|
fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> [u8; 2 * TERM_ENCODING_SIZE] {
|
|
let mut bytes = [0 as u8; 2 * TERM_ENCODING_SIZE];
|
|
bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref());
|
|
bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref());
|
|
bytes
|
|
}
|
|
|
|
fn encode_term_triple(
|
|
t1: &EncodedTerm,
|
|
t2: &EncodedTerm,
|
|
t3: &EncodedTerm,
|
|
) -> [u8; 3 * TERM_ENCODING_SIZE] {
|
|
let mut bytes = [0 as u8; 3 * TERM_ENCODING_SIZE];
|
|
bytes[0..TERM_ENCODING_SIZE].copy_from_slice(t1.as_ref());
|
|
bytes[TERM_ENCODING_SIZE..2 * TERM_ENCODING_SIZE].copy_from_slice(t2.as_ref());
|
|
bytes[2 * TERM_ENCODING_SIZE..3 * TERM_ENCODING_SIZE].copy_from_slice(t3.as_ref());
|
|
bytes
|
|
}
|
|
|
|
pub struct SPOGIndexIterator {
|
|
iter: DBRawIterator,
|
|
}
|
|
|
|
impl Iterator for SPOGIndexIterator {
|
|
type Item = Result<EncodedQuad>;
|
|
|
|
fn next(&mut self) -> Option<Result<EncodedQuad>> {
|
|
self.iter.next();
|
|
self.iter
|
|
.key()
|
|
.map(|buffer| EncodedQuad::new_from_spog_buffer(&buffer))
|
|
}
|
|
}
|
|
|
|
pub struct POSGIndexIterator {
|
|
iter: DBRawIterator,
|
|
}
|
|
|
|
impl Iterator for POSGIndexIterator {
|
|
type Item = Result<EncodedQuad>;
|
|
|
|
fn next(&mut self) -> Option<Result<EncodedQuad>> {
|
|
self.iter.next();
|
|
self.iter
|
|
.key()
|
|
.map(|buffer| EncodedQuad::new_from_posg_buffer(&buffer))
|
|
}
|
|
}
|
|
|
|
pub struct OSPGIndexIterator {
|
|
iter: DBRawIterator,
|
|
}
|
|
|
|
impl Iterator for OSPGIndexIterator {
|
|
type Item = Result<EncodedQuad>;
|
|
|
|
fn next(&mut self) -> Option<Result<EncodedQuad>> {
|
|
self.iter.next();
|
|
self.iter
|
|
.key()
|
|
.map(|buffer| EncodedQuad::new_from_ospg_buffer(&buffer))
|
|
}
|
|
}
|
|
|
|
pub struct FilteringEncodedQuadsIterator<I: Iterator<Item = Result<EncodedQuad>>> {
|
|
iter: I,
|
|
filter: EncodedQuadPattern,
|
|
}
|
|
|
|
impl<I: Iterator<Item = Result<EncodedQuad>>> Iterator for FilteringEncodedQuadsIterator<I> {
|
|
type Item = Result<EncodedQuad>;
|
|
|
|
fn next(&mut self) -> Option<Result<EncodedQuad>> {
|
|
self.iter.next().filter(|quad| match quad {
|
|
Ok(quad) => self.filter.filter(quad),
|
|
Err(e) => true,
|
|
})
|
|
}
|
|
}
|
|
|