Change EncodedTerm to contain unencoded

pull/825/head
Peter Heringer 1 year ago
parent e2b51a6046
commit 1735930e7a
  1. 9
      lib/src/sparql/eval.rs
  2. 3
      lib/src/storage/binary_encoder.rs
  3. 34
      lib/src/storage/mod.rs
  4. 30
      lib/src/storage/numeric_encoder.rs

@ -1790,7 +1790,7 @@ impl SimpleEvaluator {
let dataset = Rc::clone(&self.dataset); let dataset = Rc::clone(&self.dataset);
Rc::new(move |tuple| { Rc::new(move |tuple| {
let value = to_simple_string(&dataset, &lexical_form(tuple)?)?; let value = to_simple_string(&dataset, &lexical_form(tuple)?)?;
let datatype = if let EncodedTerm::NamedNode { iri_id } = datatype(tuple)? { let datatype = if let EncodedTerm::NamedNode { iri_id, .. } = datatype(tuple)? {
dataset.get_str(&iri_id).ok()? dataset.get_str(&iri_id).ok()?
} else { } else {
None None
@ -2134,7 +2134,7 @@ fn to_bool(term: &EncodedTerm) -> Option<bool> {
fn to_string_id(dataset: &DatasetView, term: &EncodedTerm) -> Option<SmallStringOrId> { fn to_string_id(dataset: &DatasetView, term: &EncodedTerm) -> Option<SmallStringOrId> {
match term { match term {
EncodedTerm::NamedNode { iri_id } => Some( EncodedTerm::NamedNode { iri_id, .. } => Some(
if let Ok(value) = SmallString::try_from(dataset.get_str(iri_id).ok()??.as_str()) { if let Ok(value) = SmallString::try_from(dataset.get_str(iri_id).ok()??.as_str()) {
value.into() value.into()
} else { } else {
@ -2594,8 +2594,8 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT
_ => Ordering::Less, _ => Ordering::Less,
} }
} }
EncodedTerm::NamedNode { iri_id: a } => match b { EncodedTerm::NamedNode { iri_id: a, .. } => match b {
EncodedTerm::NamedNode { iri_id: b } => { EncodedTerm::NamedNode { iri_id: b, .. } => {
compare_str_ids(dataset, a, b).unwrap_or(Ordering::Equal) compare_str_ids(dataset, a, b).unwrap_or(Ordering::Equal)
} }
_ if b.is_blank_node() => Ordering::Greater, _ if b.is_blank_node() => Ordering::Greater,
@ -2874,6 +2874,7 @@ fn datatype(dataset: &DatasetView, value: &EncodedTerm) -> Option<EncodedTerm> {
EncodedTerm::SmallTypedLiteral { datatype_id, .. } EncodedTerm::SmallTypedLiteral { datatype_id, .. }
| EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { | EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: *datatype_id, iri_id: *datatype_id,
value: "Created by DATATYPE id (lib/src/sparql/eval.rs:2877)".to_string(),
}), }),
EncodedTerm::BooleanLiteral(..) => Some(encode_named_node(dataset, xsd::BOOLEAN)), EncodedTerm::BooleanLiteral(..) => Some(encode_named_node(dataset, xsd::BOOLEAN)),
EncodedTerm::FloatLiteral(..) => Some(encode_named_node(dataset, xsd::FLOAT)), EncodedTerm::FloatLiteral(..) => Some(encode_named_node(dataset, xsd::FLOAT)),

@ -210,6 +210,7 @@ impl<R: Read> TermReader for R {
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode { Ok(EncodedTerm::NamedNode {
iri_id: StrHash::from_be_bytes(buffer), iri_id: StrHash::from_be_bytes(buffer),
value: "READ USING BE BYTES".to_string(),
}) })
} }
TYPE_NUMERICAL_BLANK_NODE_ID => { TYPE_NUMERICAL_BLANK_NODE_ID => {
@ -498,7 +499,7 @@ pub fn encode_term_quad(
pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) { pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
match term { match term {
EncodedTerm::DefaultGraph => (), EncodedTerm::DefaultGraph => (),
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { iri_id, .. } => {
sink.push(TYPE_NAMED_NODE_ID); sink.push(TYPE_NAMED_NODE_ID);
sink.extend_from_slice(&iri_id.to_be_bytes()); sink.extend_from_slice(&iri_id.to_be_bytes());
} }

@ -270,11 +270,13 @@ impl StorageReader {
let mut results = Vec::new(); let mut results = Vec::new();
match subject { match subject {
Some(sub) => { Some(sub) => {
println!("Real subject: {}", sub.get_named_node_value().unwrap());
let is_node_iri = self.is_node_iri_in_graph(sub); let is_node_iri = self.is_node_iri_in_graph(sub);
if self.is_vocab(predicate, rdf::TYPE) if self.is_vocab(predicate, rdf::TYPE)
&& self.is_vocab(object, vg::NODE) && self.is_vocab(object, vg::NODE)
&& is_node_iri && is_node_iri
{ {
println!("First");
results.push(EncodedQuad::new( results.push(EncodedQuad::new(
sub.to_owned(), sub.to_owned(),
rdf::TYPE.into(), rdf::TYPE.into(),
@ -282,6 +284,7 @@ impl StorageReader {
graph_name.to_owned(), graph_name.to_owned(),
)); ));
} else if predicate.is_none() && self.is_vocab(object, vg::NODE) && is_node_iri { } else if predicate.is_none() && self.is_vocab(object, vg::NODE) && is_node_iri {
println!("Second");
results.push(EncodedQuad::new( results.push(EncodedQuad::new(
sub.to_owned(), sub.to_owned(),
rdf::TYPE.into(), rdf::TYPE.into(),
@ -289,6 +292,7 @@ impl StorageReader {
graph_name.to_owned(), graph_name.to_owned(),
)); ));
} else if predicate.is_none() && is_node_iri { } else if predicate.is_none() && is_node_iri {
println!("Third");
results.push(EncodedQuad::new( results.push(EncodedQuad::new(
sub.to_owned(), sub.to_owned(),
rdf::TYPE.into(), rdf::TYPE.into(),
@ -298,24 +302,33 @@ impl StorageReader {
} }
if is_node_iri { if is_node_iri {
println!("Fourth");
let mut triples = self.handle_to_triples(sub, predicate, object, graph_name); let mut triples = self.handle_to_triples(sub, predicate, object, graph_name);
let mut edge_triples = let mut edge_triples =
self.handle_to_edge_triples(sub, predicate, object, graph_name); self.handle_to_edge_triples(sub, predicate, object, graph_name);
println!("Normal: {:?}", triples);
println!("Edge: {:?}", edge_triples);
results.append(&mut triples); results.append(&mut triples);
results.append(&mut edge_triples); results.append(&mut edge_triples);
} }
} }
None => { None => {
println!("None subject");
for handle in self.storage.graph.handles() { for handle in self.storage.graph.handles() {
println!("{:?}", handle);
let term = self let term = self
.handle_to_namednode(handle) .handle_to_namednode(handle)
.expect("Can turn handle to namednode"); .expect("Can turn handle to namednode");
let mut recursion_results = let mut recursion_results =
self.nodes(Some(&term), predicate, object, graph_name); self.nodes(Some(&term), predicate, object, graph_name);
println!("{:?}", recursion_results);
println!("---------------------------");
results.append(&mut recursion_results); results.append(&mut recursion_results);
} }
// println!("{:?}", results);
} }
} }
println!("Nodes successfully done!");
results results
} }
@ -335,6 +348,7 @@ impl StorageReader {
let seq_bytes = self.storage.graph.sequence_vec(handle); let seq_bytes = self.storage.graph.sequence_vec(handle);
let seq = str::from_utf8(&seq_bytes).expect("Node contains sequence"); let seq = str::from_utf8(&seq_bytes).expect("Node contains sequence");
let seq_value = Literal::new_simple_literal(seq); let seq_value = Literal::new_simple_literal(seq);
println!("Decoding 338");
if object.is_none() if object.is_none()
|| self.decode_term(object.unwrap()).unwrap() == Term::Literal(seq_value.clone()) || self.decode_term(object.unwrap()).unwrap() == Term::Literal(seq_value.clone())
{ {
@ -345,6 +359,7 @@ impl StorageReader {
graph_name.to_owned(), graph_name.to_owned(),
)); ));
} }
println!("Done decoding 338");
} else if (self.is_vocab(predicate, rdf::TYPE) || predicate.is_none()) } else if (self.is_vocab(predicate, rdf::TYPE) || predicate.is_none())
&& (object.is_none() || self.is_vocab(object, vg::NODE)) && (object.is_none() || self.is_vocab(object, vg::NODE))
{ {
@ -366,11 +381,13 @@ impl StorageReader {
graph_name: &EncodedTerm, graph_name: &EncodedTerm,
) -> Vec<EncodedQuad> { ) -> Vec<EncodedQuad> {
let mut results = Vec::new(); let mut results = Vec::new();
print!("Subject: {:?}, ", subject);
if predicate.is_none() || self.is_node_related(predicate) { if predicate.is_none() || self.is_node_related(predicate) {
let handle = Handle::new( let handle = Handle::new(
self.get_node_id(subject).expect("Subject has node id"), self.get_node_id(subject).expect("Subject has node id"),
Orientation::Forward, Orientation::Forward,
); );
println!("Handle: {:?}", handle);
let neighbors = self.storage.graph.neighbors(handle, Direction::Right); let neighbors = self.storage.graph.neighbors(handle, Direction::Right);
for neighbor in neighbors { for neighbor in neighbors {
if object.is_none() if object.is_none()
@ -455,8 +472,8 @@ impl StorageReader {
fn handle_to_namednode(&self, handle: Handle) -> Option<EncodedTerm> { fn handle_to_namednode(&self, handle: Handle) -> Option<EncodedTerm> {
let id = handle.unpack_number(); let id = handle.unpack_number();
let text = format!("<{}/node/{}>", self.storage.base, id); let text = format!("{}/node/{}", self.storage.base, id);
let named_node = NamedNode::new(text).ok()?; let named_node = NamedNode::new(text).unwrap();
Some(named_node.as_ref().into()) Some(named_node.as_ref().into())
} }
@ -486,8 +503,8 @@ impl StorageReader {
if !term.is_named_node() { if !term.is_named_node() {
return false; return false;
} }
let named_node = self.decode_named_node(term).expect("Is named node"); let named_node = term.get_named_node_value().expect("Is named node");
named_node == vocab named_node == vocab.as_str()
} }
fn is_node_iri_in_graph(&self, term: &EncodedTerm) -> bool { fn is_node_iri_in_graph(&self, term: &EncodedTerm) -> bool {
@ -500,11 +517,14 @@ impl StorageReader {
fn get_node_id(&self, term: &EncodedTerm) -> Option<u64> { fn get_node_id(&self, term: &EncodedTerm) -> Option<u64> {
match term.is_named_node() { match term.is_named_node() {
true => { true => {
let named_node = self.decode_named_node(term).expect("Is named node"); let mut text = term
let mut text = named_node.to_string(); .get_named_node_value()
.expect("Encoded NamedNode has to have value")
.to_owned();
// Remove trailing '>' // Remove trailing '>'
text.pop(); println!("Text: {}", text);
// text.pop();
let mut parts_iter = text.rsplit("/"); let mut parts_iter = text.rsplit("/");
let last = parts_iter.next(); let last = parts_iter.next();

@ -44,6 +44,7 @@ pub enum EncodedTerm {
DefaultGraph, DefaultGraph,
NamedNode { NamedNode {
iri_id: StrHash, iri_id: StrHash,
value: String,
}, },
NumericalBlankNode { NumericalBlankNode {
id: u128, id: u128,
@ -103,9 +104,16 @@ impl PartialEq for EncodedTerm {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
match (self, other) { match (self, other) {
(Self::DefaultGraph, Self::DefaultGraph) => true, (Self::DefaultGraph, Self::DefaultGraph) => true,
(Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => { (
iri_id_a == iri_id_b Self::NamedNode {
} iri_id: iri_id_a,
value: value_a,
},
Self::NamedNode {
iri_id: iri_id_b,
value: value_b,
},
) => iri_id_a == iri_id_b,
(Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => { (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => {
id_a == id_b id_a == id_b
} }
@ -213,7 +221,7 @@ impl Eq for EncodedTerm {}
impl Hash for EncodedTerm { impl Hash for EncodedTerm {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
match self { match self {
Self::NamedNode { iri_id } => iri_id.hash(state), Self::NamedNode { iri_id, value } => iri_id.hash(state),
Self::NumericalBlankNode { id } => id.hash(state), Self::NumericalBlankNode { id } => id.hash(state),
Self::SmallBlankNode(id) => id.hash(state), Self::SmallBlankNode(id) => id.hash(state),
Self::BigBlankNode { id_id } => id_id.hash(state), Self::BigBlankNode { id_id } => id_id.hash(state),
@ -329,6 +337,13 @@ impl EncodedTerm {
pub fn is_triple(&self) -> bool { pub fn is_triple(&self) -> bool {
matches!(self, Self::Triple { .. }) matches!(self, Self::Triple { .. })
} }
pub fn get_named_node_value(&self) -> Option<&str> {
match self {
Self::NamedNode { value, .. } => Some(value),
_ => None,
}
}
} }
impl From<bool> for EncodedTerm { impl From<bool> for EncodedTerm {
@ -479,6 +494,7 @@ impl From<NamedNodeRef<'_>> for EncodedTerm {
fn from(named_node: NamedNodeRef<'_>) -> Self { fn from(named_node: NamedNodeRef<'_>) -> Self {
Self::NamedNode { Self::NamedNode {
iri_id: StrHash::new(named_node.as_str()), iri_id: StrHash::new(named_node.as_str()),
value: named_node.as_str().to_owned(),
} }
} }
} }
@ -713,7 +729,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
) -> Result<(), StorageError> { ) -> Result<(), StorageError> {
match term { match term {
TermRef::NamedNode(node) => { TermRef::NamedNode(node) => {
if let EncodedTerm::NamedNode { iri_id } = encoded { if let EncodedTerm::NamedNode { iri_id, value } = encoded {
insert_str(iri_id, node.as_str()) insert_str(iri_id, node.as_str())
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) unreachable!("Invalid term encoding {:?} for {}", encoded, term)
@ -950,9 +966,7 @@ impl<S: StrLookup> Decoder for S {
EncodedTerm::DefaultGraph => { EncodedTerm::DefaultGraph => {
Err(CorruptionError::msg("The default graph tag is not a valid term").into()) Err(CorruptionError::msg("The default graph tag is not a valid term").into())
} }
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { value, .. } => Ok(NamedNode::new_unchecked(value).into()),
Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
}
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()), EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()), EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => { EncodedTerm::BigBlankNode { id_id } => {

Loading…
Cancel
Save