From 1735930e7a22df85f3a3cf332138ee3bf4c22e7d Mon Sep 17 00:00:00 2001 From: Peter Heringer Date: Mon, 18 Dec 2023 09:03:51 +0100 Subject: [PATCH] Change EncodedTerm to contain unencoded --- lib/src/sparql/eval.rs | 9 ++++---- lib/src/storage/binary_encoder.rs | 3 ++- lib/src/storage/mod.rs | 34 ++++++++++++++++++++++++------ lib/src/storage/numeric_encoder.rs | 30 +++++++++++++++++++------- 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 8a49643d..0645f532 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1790,7 +1790,7 @@ impl SimpleEvaluator { let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let value = to_simple_string(&dataset, &lexical_form(tuple)?)?; - let datatype = if let EncodedTerm::NamedNode { iri_id } = datatype(tuple)? { + let datatype = if let EncodedTerm::NamedNode { iri_id, .. } = datatype(tuple)? { dataset.get_str(&iri_id).ok()? } else { None @@ -2134,7 +2134,7 @@ fn to_bool(term: &EncodedTerm) -> Option { fn to_string_id(dataset: &DatasetView, term: &EncodedTerm) -> Option { match term { - EncodedTerm::NamedNode { iri_id } => Some( + EncodedTerm::NamedNode { iri_id, .. } => Some( if let Ok(value) = SmallString::try_from(dataset.get_str(iri_id).ok()??.as_str()) { value.into() } else { @@ -2594,8 +2594,8 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT _ => Ordering::Less, } } - EncodedTerm::NamedNode { iri_id: a } => match b { - EncodedTerm::NamedNode { iri_id: b } => { + EncodedTerm::NamedNode { iri_id: a, .. } => match b { + EncodedTerm::NamedNode { iri_id: b, .. } => { compare_str_ids(dataset, a, b).unwrap_or(Ordering::Equal) } _ if b.is_blank_node() => Ordering::Greater, @@ -2874,6 +2874,7 @@ fn datatype(dataset: &DatasetView, value: &EncodedTerm) -> Option { EncodedTerm::SmallTypedLiteral { datatype_id, .. } | EncodedTerm::BigTypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode { iri_id: *datatype_id, + value: "Created by DATATYPE id (lib/src/sparql/eval.rs:2877)".to_string(), }), EncodedTerm::BooleanLiteral(..) => Some(encode_named_node(dataset, xsd::BOOLEAN)), EncodedTerm::FloatLiteral(..) => Some(encode_named_node(dataset, xsd::FLOAT)), diff --git a/lib/src/storage/binary_encoder.rs b/lib/src/storage/binary_encoder.rs index 4e888c2f..3bd72896 100644 --- a/lib/src/storage/binary_encoder.rs +++ b/lib/src/storage/binary_encoder.rs @@ -210,6 +210,7 @@ impl TermReader for R { self.read_exact(&mut buffer)?; Ok(EncodedTerm::NamedNode { iri_id: StrHash::from_be_bytes(buffer), + value: "READ USING BE BYTES".to_string(), }) } TYPE_NUMERICAL_BLANK_NODE_ID => { @@ -498,7 +499,7 @@ pub fn encode_term_quad( pub fn write_term(sink: &mut Vec, term: &EncodedTerm) { match term { EncodedTerm::DefaultGraph => (), - EncodedTerm::NamedNode { iri_id } => { + EncodedTerm::NamedNode { iri_id, .. } => { sink.push(TYPE_NAMED_NODE_ID); sink.extend_from_slice(&iri_id.to_be_bytes()); } diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 544f5db5..4ad3ef5b 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -270,11 +270,13 @@ impl StorageReader { let mut results = Vec::new(); match subject { Some(sub) => { + println!("Real subject: {}", sub.get_named_node_value().unwrap()); let is_node_iri = self.is_node_iri_in_graph(sub); if self.is_vocab(predicate, rdf::TYPE) && self.is_vocab(object, vg::NODE) && is_node_iri { + println!("First"); results.push(EncodedQuad::new( sub.to_owned(), rdf::TYPE.into(), @@ -282,6 +284,7 @@ impl StorageReader { graph_name.to_owned(), )); } else if predicate.is_none() && self.is_vocab(object, vg::NODE) && is_node_iri { + println!("Second"); results.push(EncodedQuad::new( sub.to_owned(), rdf::TYPE.into(), @@ -289,6 +292,7 @@ impl StorageReader { graph_name.to_owned(), )); } else if predicate.is_none() && is_node_iri { + println!("Third"); results.push(EncodedQuad::new( sub.to_owned(), rdf::TYPE.into(), @@ -298,24 +302,33 @@ impl StorageReader { } if is_node_iri { + println!("Fourth"); let mut triples = self.handle_to_triples(sub, predicate, object, graph_name); let mut edge_triples = self.handle_to_edge_triples(sub, predicate, object, graph_name); + println!("Normal: {:?}", triples); + println!("Edge: {:?}", edge_triples); results.append(&mut triples); results.append(&mut edge_triples); } } None => { + println!("None subject"); for handle in self.storage.graph.handles() { + println!("{:?}", handle); let term = self .handle_to_namednode(handle) .expect("Can turn handle to namednode"); let mut recursion_results = self.nodes(Some(&term), predicate, object, graph_name); + println!("{:?}", recursion_results); + println!("---------------------------"); results.append(&mut recursion_results); } + // println!("{:?}", results); } } + println!("Nodes successfully done!"); results } @@ -335,6 +348,7 @@ impl StorageReader { let seq_bytes = self.storage.graph.sequence_vec(handle); let seq = str::from_utf8(&seq_bytes).expect("Node contains sequence"); let seq_value = Literal::new_simple_literal(seq); + println!("Decoding 338"); if object.is_none() || self.decode_term(object.unwrap()).unwrap() == Term::Literal(seq_value.clone()) { @@ -345,6 +359,7 @@ impl StorageReader { graph_name.to_owned(), )); } + println!("Done decoding 338"); } else if (self.is_vocab(predicate, rdf::TYPE) || predicate.is_none()) && (object.is_none() || self.is_vocab(object, vg::NODE)) { @@ -366,11 +381,13 @@ impl StorageReader { graph_name: &EncodedTerm, ) -> Vec { let mut results = Vec::new(); + print!("Subject: {:?}, ", subject); if predicate.is_none() || self.is_node_related(predicate) { let handle = Handle::new( self.get_node_id(subject).expect("Subject has node id"), Orientation::Forward, ); + println!("Handle: {:?}", handle); let neighbors = self.storage.graph.neighbors(handle, Direction::Right); for neighbor in neighbors { if object.is_none() @@ -455,8 +472,8 @@ impl StorageReader { fn handle_to_namednode(&self, handle: Handle) -> Option { let id = handle.unpack_number(); - let text = format!("<{}/node/{}>", self.storage.base, id); - let named_node = NamedNode::new(text).ok()?; + let text = format!("{}/node/{}", self.storage.base, id); + let named_node = NamedNode::new(text).unwrap(); Some(named_node.as_ref().into()) } @@ -486,8 +503,8 @@ impl StorageReader { if !term.is_named_node() { return false; } - let named_node = self.decode_named_node(term).expect("Is named node"); - named_node == vocab + let named_node = term.get_named_node_value().expect("Is named node"); + named_node == vocab.as_str() } fn is_node_iri_in_graph(&self, term: &EncodedTerm) -> bool { @@ -500,11 +517,14 @@ impl StorageReader { fn get_node_id(&self, term: &EncodedTerm) -> Option { match term.is_named_node() { true => { - let named_node = self.decode_named_node(term).expect("Is named node"); - let mut text = named_node.to_string(); + let mut text = term + .get_named_node_value() + .expect("Encoded NamedNode has to have value") + .to_owned(); // Remove trailing '>' - text.pop(); + println!("Text: {}", text); + // text.pop(); let mut parts_iter = text.rsplit("/"); let last = parts_iter.next(); diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index 9a084ff7..d7ce6848 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -44,6 +44,7 @@ pub enum EncodedTerm { DefaultGraph, NamedNode { iri_id: StrHash, + value: String, }, NumericalBlankNode { id: u128, @@ -103,9 +104,16 @@ impl PartialEq for EncodedTerm { fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::DefaultGraph, Self::DefaultGraph) => true, - (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => { - iri_id_a == iri_id_b - } + ( + Self::NamedNode { + iri_id: iri_id_a, + value: value_a, + }, + Self::NamedNode { + iri_id: iri_id_b, + value: value_b, + }, + ) => iri_id_a == iri_id_b, (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => { id_a == id_b } @@ -213,7 +221,7 @@ impl Eq for EncodedTerm {} impl Hash for EncodedTerm { fn hash(&self, state: &mut H) { match self { - Self::NamedNode { iri_id } => iri_id.hash(state), + Self::NamedNode { iri_id, value } => iri_id.hash(state), Self::NumericalBlankNode { id } => id.hash(state), Self::SmallBlankNode(id) => id.hash(state), Self::BigBlankNode { id_id } => id_id.hash(state), @@ -329,6 +337,13 @@ impl EncodedTerm { pub fn is_triple(&self) -> bool { matches!(self, Self::Triple { .. }) } + + pub fn get_named_node_value(&self) -> Option<&str> { + match self { + Self::NamedNode { value, .. } => Some(value), + _ => None, + } + } } impl From for EncodedTerm { @@ -479,6 +494,7 @@ impl From> for EncodedTerm { fn from(named_node: NamedNodeRef<'_>) -> Self { Self::NamedNode { iri_id: StrHash::new(named_node.as_str()), + value: named_node.as_str().to_owned(), } } } @@ -713,7 +729,7 @@ pub fn insert_term Result<(), StorageError>>( ) -> Result<(), StorageError> { match term { TermRef::NamedNode(node) => { - if let EncodedTerm::NamedNode { iri_id } = encoded { + if let EncodedTerm::NamedNode { iri_id, value } = encoded { insert_str(iri_id, node.as_str()) } else { unreachable!("Invalid term encoding {:?} for {}", encoded, term) @@ -950,9 +966,7 @@ impl Decoder for S { EncodedTerm::DefaultGraph => { Err(CorruptionError::msg("The default graph tag is not a valid term").into()) } - EncodedTerm::NamedNode { iri_id } => { - Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into()) - } + EncodedTerm::NamedNode { value, .. } => Ok(NamedNode::new_unchecked(value).into()), EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()), EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()), EncodedTerm::BigBlankNode { id_id } => {