Finish nodes code

pull/825/head
Peter Heringer 1 year ago
parent d8c4669af6
commit 6bd2576884
  1. 304
      lib/src/storage/mod.rs
  2. 67
      lib/src/storage/vg_vocab.rs

@ -1,7 +1,8 @@
#![allow(clippy::same_name_method)]
use crate::model::vocab::rdf;
#[cfg(not(target_family = "wasm"))]
use crate::model::Quad;
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, Term, TermRef};
use crate::model::{GraphNameRef, NamedNodeRef, NamedOrBlankNodeRef, QuadRef, Term, TermRef};
use crate::storage::backend::{Reader, Transaction};
#[cfg(not(target_family = "wasm"))]
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION;
@ -15,10 +16,17 @@ pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, S
#[cfg(not(target_family = "wasm"))]
use crate::storage::numeric_encoder::Decoder;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::vg_vocab::{faldo, vg};
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
use gfa::gfa::Orientation;
use gfa::parser::GFAParser;
use handlegraph::{conversion::from_gfa, packedgraph::PackedGraph};
use oxrdf::NamedNode;
use handlegraph::handle::{Direction, Handle};
use handlegraph::{
conversion::from_gfa, handlegraph::IntoHandles, handlegraph::IntoNeighbors,
handlegraph::IntoSequences, packedgraph::PackedGraph,
};
use oxrdf::{Literal, NamedNode};
use std::str;
#[cfg(not(target_family = "wasm"))]
use std::collections::VecDeque;
@ -43,6 +51,7 @@ mod binary_encoder;
mod error;
pub mod numeric_encoder;
pub mod small_string;
mod vg_vocab;
const ID2STR_CF: &str = "id2str";
const SPOG_CF: &str = "spog";
@ -64,12 +73,14 @@ const DEFAULT_BULK_LOAD_BATCH_SIZE: usize = 1_000_000;
#[derive(Clone)]
pub struct Storage {
graph: PackedGraph,
base: String,
}
impl Storage {
pub fn new() -> Result<Self, StorageError> {
Ok(Self {
graph: PackedGraph::new(),
base: "https://example.org".to_owned(),
})
}
@ -80,7 +91,10 @@ impl Storage {
.parse_file(path)
.map_err(|err| StorageError::Other(Box::new(err)))?;
let graph = from_gfa::<PackedGraph, ()>(&gfa);
Ok(Self { graph })
Ok(Self {
graph,
base: "https://example.org".to_owned(),
})
}
#[cfg(not(target_family = "wasm"))]
@ -90,7 +104,10 @@ impl Storage {
.parse_file(primary_path)
.map_err(|err| StorageError::Other(Box::new(err)))?;
let graph = from_gfa::<PackedGraph, ()>(&gfa);
Ok(Self { graph })
Ok(Self {
graph,
base: "https://example.org".to_owned(),
})
}
#[cfg(not(target_family = "wasm"))]
@ -103,7 +120,10 @@ impl Storage {
.parse_file(primary_path)
.map_err(|err| StorageError::Other(Box::new(err)))?;
let graph = from_gfa::<PackedGraph, ()>(&gfa);
Ok(Self { graph })
Ok(Self {
graph,
base: "https://example.org".to_owned(),
})
}
#[cfg(not(target_family = "wasm"))]
@ -113,7 +133,10 @@ impl Storage {
.parse_file(path)
.map_err(|err| StorageError::Other(Box::new(err)))?;
let graph = from_gfa::<PackedGraph, ()>(&gfa);
Ok(Self { graph })
Ok(Self {
graph,
base: "https://example.org".to_owned(),
})
}
pub fn snapshot(&self) -> StorageReader {
@ -191,10 +214,11 @@ impl StorageReader {
object: Option<&EncodedTerm>,
graph_name: Option<&EncodedTerm>,
) -> ChainedDecodingQuadIterator {
let sub = subject.map(|s| self.decode_term(s).ok()).flatten();
let pre = predicate.map(|s| self.decode_term(s).ok()).flatten();
let obj = object.map(|s| self.decode_term(s).ok()).flatten();
self.nodes(&sub, &pre, &obj);
// let sub = subject.map(|s| self.decode_term(s).ok()).flatten();
// let pre = predicate.map(|s| self.decode_term(s).ok()).flatten();
let graph_name = graph_name.expect("Graph name is given");
// let obj = object.map(|s| self.decode_term(s).ok()).flatten();
self.nodes(subject, predicate, object, graph_name);
return ChainedDecodingQuadIterator {
first: DecodingQuadIterator {
terms: Vec::new(),
@ -204,19 +228,265 @@ impl StorageReader {
};
}
fn nodes(&self, subject: &Option<Term>, predicate: &Option<Term>, object: &Option<Term>) {
fn nodes(
&self,
subject: Option<&EncodedTerm>,
predicate: Option<&EncodedTerm>,
object: Option<&EncodedTerm>,
graph_name: &EncodedTerm,
) -> Vec<EncodedQuad> {
let mut results = Vec::new();
match subject {
Some(sub) => {
let is_node_iri = self.is_node_iri_in_graph(sub);
if self.is_vocab(predicate, rdf::TYPE)
&& self.is_vocab(object, vg::NODE)
&& is_node_iri
{
results.push(EncodedQuad::new(
sub.to_owned(),
rdf::TYPE.into(),
vg::NODE.into(),
graph_name.to_owned(),
));
} else if predicate.is_none() && self.is_vocab(object, vg::NODE) && is_node_iri {
results.push(EncodedQuad::new(
sub.to_owned(),
rdf::TYPE.into(),
vg::NODE.into(),
graph_name.to_owned(),
));
} else if predicate.is_none() && is_node_iri {
results.push(EncodedQuad::new(
sub.to_owned(),
rdf::TYPE.into(),
vg::NODE.into(),
graph_name.to_owned(),
));
}
if is_node_iri {
let mut triples = self.handle_to_triples(sub, predicate, object, graph_name);
let mut edge_triples =
self.handle_to_edge_triples(sub, predicate, object, graph_name);
results.append(&mut triples);
results.append(&mut edge_triples);
}
}
None => {
for handle in self.storage.graph.handles() {
let term = self
.handle_to_namednode(handle)
.expect("Can turn handle to namednode");
let mut recursion_results =
self.nodes(Some(&term), predicate, object, graph_name);
results.append(&mut recursion_results);
}
}
None => {}
}
results
}
fn is_node_iri_in_graph(&self, term: &Term) -> bool {
let named_node: NamedNodeRef = term.into();
// term.is_named_node() && Ok(term.into::<NamedNode>())
true
fn handle_to_triples(
&self,
subject: &EncodedTerm,
predicate: Option<&EncodedTerm>,
object: Option<&EncodedTerm>,
graph_name: &EncodedTerm,
) -> Vec<EncodedQuad> {
let mut results = Vec::new();
if self.is_vocab(predicate, rdf::VALUE) || predicate.is_none() {
let handle = Handle::new(
self.get_node_id(subject).expect("Subject is node"),
Orientation::Forward,
);
let seq_bytes = self.storage.graph.sequence_vec(handle);
let seq = str::from_utf8(&seq_bytes).expect("Node contains sequence");
let seq_value = Literal::new_simple_literal(seq);
if object.is_none()
|| self.decode_term(object.unwrap()).unwrap() == Term::Literal(seq_value.clone())
{
results.push(EncodedQuad::new(
subject.to_owned(),
rdf::VALUE.into(),
seq_value.as_ref().into(),
graph_name.to_owned(),
));
}
} else if (self.is_vocab(predicate, rdf::TYPE) || predicate.is_none())
&& (object.is_none() || self.is_vocab(object, vg::NODE))
{
results.push(EncodedQuad::new(
subject.to_owned(),
rdf::TYPE.into(),
vg::NODE.into(),
graph_name.to_owned(),
));
}
results
}
fn handle_to_edge_triples(
&self,
subject: &EncodedTerm,
predicate: Option<&EncodedTerm>,
object: Option<&EncodedTerm>,
graph_name: &EncodedTerm,
) -> Vec<EncodedQuad> {
let mut results = Vec::new();
if predicate.is_none() || self.is_node_related(predicate) {
let handle = Handle::new(
self.get_node_id(subject).expect("Subject has node id"),
Orientation::Forward,
);
let neighbors = self.storage.graph.neighbors(handle, Direction::Right);
for neighbor in neighbors {
if object.is_none()
|| self
.get_node_id(object.unwrap())
.expect("Object has node id")
== neighbor.unpack_number()
{
let mut edge_triples =
self.generate_edge_triples(handle, neighbor, predicate, graph_name);
results.append(&mut edge_triples);
}
}
}
results
}
fn generate_edge_triples(
&self,
subject: Handle,
object: Handle,
predicate: Option<&EncodedTerm>,
graph_name: &EncodedTerm,
) -> Vec<EncodedQuad> {
let mut results = Vec::new();
let node_is_reverse = subject.is_reverse();
let other_is_reverse = object.is_reverse();
if (predicate.is_none() || self.is_vocab(predicate, vg::LINKS_FORWARD_TO_FORWARD))
&& !node_is_reverse
&& !other_is_reverse
{
results.push(EncodedQuad::new(
self.handle_to_namednode(subject).expect("Subject is fine"),
vg::LINKS_FORWARD_TO_FORWARD.into(),
self.handle_to_namednode(object).expect("Object is fine"),
graph_name.to_owned(),
));
}
if (predicate.is_none() || self.is_vocab(predicate, vg::LINKS_FORWARD_TO_REVERSE))
&& !node_is_reverse
&& other_is_reverse
{
results.push(EncodedQuad::new(
self.handle_to_namednode(subject).expect("Subject is fine"),
vg::LINKS_FORWARD_TO_REVERSE.into(),
self.handle_to_namednode(object).expect("Object is fine"),
graph_name.to_owned(),
));
}
if (predicate.is_none() || self.is_vocab(predicate, vg::LINKS_REVERSE_TO_FORWARD))
&& node_is_reverse
&& !other_is_reverse
{
results.push(EncodedQuad::new(
self.handle_to_namednode(subject).expect("Subject is fine"),
vg::LINKS_REVERSE_TO_FORWARD.into(),
self.handle_to_namednode(object).expect("Object is fine"),
graph_name.to_owned(),
));
}
if (predicate.is_none() || self.is_vocab(predicate, vg::LINKS_REVERSE_TO_REVERSE))
&& node_is_reverse
&& other_is_reverse
{
results.push(EncodedQuad::new(
self.handle_to_namednode(subject).expect("Subject is fine"),
vg::LINKS_REVERSE_TO_REVERSE.into(),
self.handle_to_namednode(object).expect("Object is fine"),
graph_name.to_owned(),
));
}
if predicate.is_none() || self.is_vocab(predicate, vg::LINKS) {
results.push(EncodedQuad::new(
self.handle_to_namednode(subject).expect("Subject is fine"),
vg::LINKS.into(),
self.handle_to_namednode(object).expect("Object is fine"),
graph_name.to_owned(),
));
}
results
}
fn handle_to_namednode(&self, handle: Handle) -> Option<EncodedTerm> {
let id = handle.unpack_number();
let text = format!("<{}/node/{}>", self.storage.base, id);
let named_node = NamedNode::new(text).ok()?;
Some(named_node.as_ref().into())
}
fn is_node_related(&self, predicate: Option<&EncodedTerm>) -> bool {
let predicates = [
vg::LINKS,
vg::LINKS_FORWARD_TO_FORWARD,
vg::LINKS_FORWARD_TO_REVERSE,
vg::LINKS_REVERSE_TO_FORWARD,
vg::LINKS_REVERSE_TO_REVERSE,
];
if predicate.is_none() {
return false;
}
predicates
.into_iter()
.map(|x| self.is_vocab(predicate, x))
.reduce(|acc, x| acc || x)
.unwrap()
}
fn is_vocab(&self, term: Option<&EncodedTerm>, vocab: NamedNodeRef) -> bool {
if term.is_none() {
return false;
}
let term = term.unwrap();
if !term.is_named_node() {
return false;
}
let named_node = self.decode_named_node(term).expect("Is named node");
named_node == vocab
}
fn is_node_iri_in_graph(&self, term: &EncodedTerm) -> bool {
match self.get_node_id(term) {
Some(id) => self.storage.graph.has_node(id),
None => false,
}
}
fn get_node_id(&self, term: &EncodedTerm) -> Option<u64> {
match term.is_named_node() {
true => {
let named_node = self.decode_named_node(term).expect("Is named node");
let mut text = named_node.to_string();
// Remove trailing '>'
text.pop();
let mut parts_iter = text.rsplit("/");
let last = parts_iter.next();
let pre_last = parts_iter.next();
match last.is_some()
&& pre_last.is_some()
&& pre_last.expect("Option is some") == "node"
{
true => last.expect("Option is some").parse::<u64>().ok(),
false => None,
}
}
false => None,
}
}
pub fn quads(&self) -> ChainedDecodingQuadIterator {

@ -0,0 +1,67 @@
pub mod vg {
use oxrdf::NamedNodeRef;
pub const NODE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#Node");
pub const PATH: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#Path");
pub const STEP: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#Step");
pub const RANK: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#rank");
pub const POSITION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#position");
pub const PATH_PRED: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#path");
pub const LINKS_FORWARD_TO_FORWARD: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#linksForwardToForward");
pub const LINKS_FORWARD_TO_REVERSE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#linksForwardToReverse");
pub const LINKS_REVERSE_TO_FORWARD: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#linksReverseToForward");
pub const LINKS_REVERSE_TO_REVERSE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#linksReverseToReverse");
pub const LINKS: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#links");
pub const REVERSE_OF_NODE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#reverseOfNode");
pub const NODE_PRED: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/vg#node");
}
pub mod faldo {
use oxrdf::NamedNodeRef;
pub const REGION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#Region");
pub const EXACT_POSITION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#ExactPosition");
pub const POSITION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#Position");
pub const BEGIN: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#begin");
pub const END: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#end");
pub const REFERENCE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#reference");
pub const POSITION_PRED: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://biohackathon.org/resource/faldo#position");
}
Loading…
Cancel
Save