From a7794bbbbe0119fc1d4d415260a700b6e5ac5517 Mon Sep 17 00:00:00 2001 From: Peter Heringer Date: Mon, 15 Jan 2024 09:19:07 +0100 Subject: [PATCH] Fix some issues --- lib/src/sparql/eval.rs | 66 ++++++++++++++-------------- lib/src/storage/binary_encoder.rs | 3 +- lib/src/storage/numeric_encoder.rs | 13 ++++-- lib/src/storage/storage_generator.rs | 50 +++++++++++++++++++-- 4 files changed, 91 insertions(+), 41 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 0645f532..ffb01af0 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1932,8 +1932,8 @@ impl SimpleEvaluator { EncodedTerm::DecimalLiteral(value) => Some(Double::from(value).into()), EncodedTerm::BooleanLiteral(value) => Some(Double::from(value).into()), EncodedTerm::SmallStringLiteral(value) => parse_double_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_double_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_double_str(&value) } _ => None, }) @@ -1948,8 +1948,8 @@ impl SimpleEvaluator { EncodedTerm::DecimalLiteral(value) => Some(Float::from(value).into()), EncodedTerm::BooleanLiteral(value) => Some(Float::from(value).into()), EncodedTerm::SmallStringLiteral(value) => parse_float_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_float_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_float_str(&value) } _ => None, }) @@ -1968,8 +1968,8 @@ impl SimpleEvaluator { } EncodedTerm::BooleanLiteral(value) => Some(Integer::from(value).into()), EncodedTerm::SmallStringLiteral(value) => parse_integer_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_integer_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_integer_str(&value) } _ => None, }) @@ -1988,8 +1988,8 @@ impl SimpleEvaluator { EncodedTerm::DecimalLiteral(value) => Some(value.into()), EncodedTerm::BooleanLiteral(value) => Some(Decimal::from(value).into()), EncodedTerm::SmallStringLiteral(value) => parse_decimal_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_decimal_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_decimal_str(&value) } _ => None, }) @@ -2001,8 +2001,8 @@ impl SimpleEvaluator { EncodedTerm::DateLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()), EncodedTerm::SmallStringLiteral(value) => parse_date_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_date_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_date_str(&value) } _ => None, }) @@ -2014,8 +2014,8 @@ impl SimpleEvaluator { EncodedTerm::TimeLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()), EncodedTerm::SmallStringLiteral(value) => parse_time_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_time_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_time_str(&value) } _ => None, }) @@ -2027,8 +2027,8 @@ impl SimpleEvaluator { EncodedTerm::DateTimeLiteral(value) => Some(value.into()), EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()), EncodedTerm::SmallStringLiteral(value) => parse_date_time_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_date_time_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_date_time_str(&value) } _ => None, }) @@ -2045,8 +2045,8 @@ impl SimpleEvaluator { Some(Duration::from(value).into()) } EncodedTerm::SmallStringLiteral(value) => parse_duration_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_duration_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_duration_str(&value) } _ => None, }) @@ -2060,8 +2060,8 @@ impl SimpleEvaluator { } EncodedTerm::YearMonthDurationLiteral(value) => Some(value.into()), EncodedTerm::SmallStringLiteral(value) => parse_year_month_duration_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_year_month_duration_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_year_month_duration_str(&value) } _ => None, }) @@ -2075,8 +2075,8 @@ impl SimpleEvaluator { } EncodedTerm::DayTimeDurationLiteral(value) => Some(value.into()), EncodedTerm::SmallStringLiteral(value) => parse_day_time_duration_str(&value), - EncodedTerm::BigStringLiteral { value_id } => { - parse_day_time_duration_str(&dataset.get_str(&value_id).ok()??) + EncodedTerm::BigStringLiteral { value_id, value } => { + parse_day_time_duration_str(&value) } _ => None, }) @@ -2150,7 +2150,7 @@ fn to_string_id(dataset: &DatasetView, term: &EncodedTerm) -> Option Some((*value).into()), - EncodedTerm::BigStringLiteral { value_id } + EncodedTerm::BigStringLiteral { value_id, .. } | EncodedTerm::BigSmallLangStringLiteral { value_id, .. } | EncodedTerm::BigBigLangStringLiteral { value_id, .. } | EncodedTerm::BigTypedLiteral { value_id, .. } => Some((*value_id).into()), @@ -2183,7 +2183,7 @@ fn to_string_id(dataset: &DatasetView, term: &EncodedTerm) -> Option Option { match term { EncodedTerm::SmallStringLiteral(value) => Some((*value).into()), - EncodedTerm::BigStringLiteral { value_id } => dataset.get_str(value_id).ok()?, + EncodedTerm::BigStringLiteral { value_id, value } => Some(value.to_owned()), _ => None, } } @@ -2191,7 +2191,7 @@ fn to_simple_string(dataset: &DatasetView, term: &EncodedTerm) -> Option fn to_simple_string_id(term: &EncodedTerm) -> Option { match term { EncodedTerm::SmallStringLiteral(value) => Some((*value).into()), - EncodedTerm::BigStringLiteral { value_id } => Some((*value_id).into()), + EncodedTerm::BigStringLiteral { value_id, .. } => Some((*value_id).into()), _ => None, } } @@ -2201,8 +2201,8 @@ fn to_string(dataset: &DatasetView, term: &EncodedTerm) -> Option { EncodedTerm::SmallStringLiteral(value) | EncodedTerm::SmallSmallLangStringLiteral { value, .. } | EncodedTerm::SmallBigLangStringLiteral { value, .. } => Some((*value).into()), - EncodedTerm::BigStringLiteral { value_id } - | EncodedTerm::BigSmallLangStringLiteral { value_id, .. } + EncodedTerm::BigStringLiteral { value_id, value } => Some(value.to_owned()), + EncodedTerm::BigSmallLangStringLiteral { value_id, .. } | EncodedTerm::BigBigLangStringLiteral { value_id, .. } => { dataset.get_str(value_id).ok()? } @@ -2216,8 +2216,8 @@ fn to_string_and_language( ) -> Option<(String, Option)> { match term { EncodedTerm::SmallStringLiteral(value) => Some(((*value).into(), None)), - EncodedTerm::BigStringLiteral { value_id } => { - Some((dataset.get_str(value_id).ok()??, None)) + EncodedTerm::BigStringLiteral { value_id, value } => { + Some((value.to_owned(), None)) } EncodedTerm::SmallSmallLangStringLiteral { value, language } => { Some(((*value).into(), Some((*language).into()))) @@ -2254,7 +2254,7 @@ fn build_string_literal(dataset: &DatasetView, value: &str) -> EncodedTerm { fn build_string_literal_from_id(id: SmallStringOrId) -> EncodedTerm { match id { SmallStringOrId::Small(value) => EncodedTerm::SmallStringLiteral(value), - SmallStringOrId::Big(value_id) => EncodedTerm::BigStringLiteral { value_id }, + SmallStringOrId::Big(value_id) => EncodedTerm::BigStringLiteral { value_id, value: "Why are we here?".to_owned() }, } } @@ -2412,8 +2412,8 @@ fn equals(a: &EncodedTerm, b: &EncodedTerm) -> Option { EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None, _ => Some(false), }, - EncodedTerm::BigStringLiteral { value_id: a } => match b { - EncodedTerm::BigStringLiteral { value_id: b } => Some(a == b), + EncodedTerm::BigStringLiteral { value_id: a, .. } => match b { + EncodedTerm::BigStringLiteral { value_id: b, .. } => Some(a == b), EncodedTerm::SmallTypedLiteral { .. } | EncodedTerm::BigTypedLiteral { .. } => None, _ => Some(false), }, @@ -2671,12 +2671,12 @@ fn partial_cmp_literals( match a { EncodedTerm::SmallStringLiteral(a) => match b { EncodedTerm::SmallStringLiteral(b) => a.partial_cmp(b), - EncodedTerm::BigStringLiteral { value_id: b } => compare_str_str_id(dataset, a, b), + EncodedTerm::BigStringLiteral { value_id: b, .. } => compare_str_str_id(dataset, a, b), _ => None, }, - EncodedTerm::BigStringLiteral { value_id: a } => match b { + EncodedTerm::BigStringLiteral { value_id: a, .. } => match b { EncodedTerm::SmallStringLiteral(b) => compare_str_id_str(dataset, a, b), - EncodedTerm::BigStringLiteral { value_id: b } => compare_str_ids(dataset, a, b), + EncodedTerm::BigStringLiteral { value_id: b, .. } => compare_str_ids(dataset, a, b), _ => None, }, EncodedTerm::SmallSmallLangStringLiteral { diff --git a/lib/src/storage/binary_encoder.rs b/lib/src/storage/binary_encoder.rs index 3bd72896..10bbdad4 100644 --- a/lib/src/storage/binary_encoder.rs +++ b/lib/src/storage/binary_encoder.rs @@ -311,6 +311,7 @@ impl TermReader for R { self.read_exact(&mut buffer)?; Ok(EncodedTerm::BigStringLiteral { value_id: StrHash::from_be_bytes(buffer), + value: std::str::from_utf8(&buffer).expect("Should be fine to convert").to_owned(), }) } TYPE_BOOLEAN_LITERAL_TRUE => Ok(true.into()), @@ -519,7 +520,7 @@ pub fn write_term(sink: &mut Vec, term: &EncodedTerm) { sink.push(TYPE_SMALL_STRING_LITERAL); sink.extend_from_slice(&value.to_be_bytes()) } - EncodedTerm::BigStringLiteral { value_id } => { + EncodedTerm::BigStringLiteral { value_id, .. } => { sink.push(TYPE_BIG_STRING_LITERAL); sink.extend_from_slice(&value_id.to_be_bytes()); } diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index d7ce6848..7cfbe033 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -56,6 +56,7 @@ pub enum EncodedTerm { SmallStringLiteral(SmallString), BigStringLiteral { value_id: StrHash, + value: String, }, SmallSmallLangStringLiteral { value: SmallString, @@ -125,9 +126,11 @@ impl PartialEq for EncodedTerm { ( Self::BigStringLiteral { value_id: value_id_a, + value: value_a, }, Self::BigStringLiteral { value_id: value_id_b, + value: value_b, }, ) => value_id_a == value_id_b, ( @@ -227,7 +230,7 @@ impl Hash for EncodedTerm { Self::BigBlankNode { id_id } => id_id.hash(state), Self::DefaultGraph => (), Self::SmallStringLiteral(value) => value.hash(state), - Self::BigStringLiteral { value_id } => value_id.hash(state), + Self::BigStringLiteral { value_id, value } => value_id.hash(state), Self::SmallSmallLangStringLiteral { value, language } => { value.hash(state); language.hash(state); @@ -552,6 +555,7 @@ impl From> for EncodedTerm { } else { Self::BigStringLiteral { value_id: StrHash::new(value), + value: value.to_owned(), } }) } @@ -741,7 +745,7 @@ pub fn insert_term Result<(), StorageError>>( _ => unreachable!("Invalid term encoding {:?} for {}", encoded, term), }, TermRef::Literal(literal) => match encoded { - EncodedTerm::BigStringLiteral { value_id } + EncodedTerm::BigStringLiteral { value_id, .. } | EncodedTerm::BigSmallLangStringLiteral { value_id, .. } => { insert_str(value_id, literal.value()) } @@ -962,6 +966,7 @@ pub trait Decoder: StrLookup { impl Decoder for S { fn decode_term(&self, encoded: &EncodedTerm) -> Result { + println!("DECODING: {:?}", encoded); match encoded { EncodedTerm::DefaultGraph => { Err(CorruptionError::msg("The default graph tag is not a valid term").into()) @@ -975,8 +980,8 @@ impl Decoder for S { EncodedTerm::SmallStringLiteral(value) => { Ok(Literal::new_simple_literal(*value).into()) } - EncodedTerm::BigStringLiteral { value_id } => { - Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into()) + EncodedTerm::BigStringLiteral { value_id, value } => { + Ok(Literal::new_simple_literal(value).into()) } EncodedTerm::SmallSmallLangStringLiteral { value, language } => { Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into()) diff --git a/lib/src/storage/storage_generator.rs b/lib/src/storage/storage_generator.rs index 452f47f2..480ddc21 100644 --- a/lib/src/storage/storage_generator.rs +++ b/lib/src/storage/storage_generator.rs @@ -31,6 +31,24 @@ impl StorageGenerator { Self { storage } } + fn print_quad(&self, quad: &EncodedQuad) { + let sub = match &quad.subject { + EncodedTerm::NamedNode { iri_id: _, value } => value.to_owned(), + _ => "NOT NAMED".to_owned(), + }; + let pre = match &quad.predicate { + EncodedTerm::NamedNode { iri_id: _, value } => value.to_owned(), + _ => "NOT NAMED".to_owned(), + }; + let obj = match &quad.object { + EncodedTerm::NamedNode { iri_id: _, value } => value.to_owned(), + EncodedTerm::SmallStringLiteral(value) => format!("\"{}\"", value).to_string(), + EncodedTerm::IntegerLiteral(value) => value.to_string(), + _ => "NOT NAMED".to_owned(), + }; + println!("\t- {}\t{}\t{} .", sub, pre, obj); + } + pub fn quads_for_pattern( &self, subject: Option<&EncodedTerm>, @@ -54,6 +72,9 @@ impl StorageGenerator { if self.is_vocab(predicate, rdf::TYPE) && object.is_some() { println!("OF: rdf::type"); let terms = self.type_triples(subject, predicate, object, graph_name); + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -64,6 +85,9 @@ impl StorageGenerator { } else if self.is_node_related(predicate) { println!("OF: nodes"); let terms = self.nodes(subject, predicate, object, graph_name); + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -74,6 +98,9 @@ impl StorageGenerator { } else if self.is_step_associated(predicate) { println!("OF: steps"); let terms = self.steps(subject, predicate, object, graph_name); + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -84,6 +111,9 @@ impl StorageGenerator { } else if self.is_vocab(predicate, rdfs::LABEL) { println!("OF: rdfs::label"); let terms = self.paths(subject, predicate, object, graph_name); + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -99,6 +129,9 @@ impl StorageGenerator { let terms_steps = self.steps(subject, predicate, object, graph_name); terms.extend(terms_paths); terms.extend(terms_steps); + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -128,6 +161,9 @@ impl StorageGenerator { Vec::new() } }; + for triple in &terms { + self.print_quad(triple); + } return ChainedDecodingQuadIterator { first: DecodingQuadIterator { terms, @@ -338,20 +374,26 @@ impl StorageGenerator { println!("SF: some subject"); match step_type { StepType::Rank(path_name, target_rank) => { + println!("RANK: {}, {}", path_name, target_rank); if let Some(id) = self.storage.graph.get_path_id(path_name.as_bytes()) { let path_ref = self.storage.graph.get_path_ref(id).unwrap(); let step_handle = path_ref.step_at(path_ref.first_step()); - let step_handle = step_handle.unwrap(); + let mut step_handle = step_handle.unwrap(); let mut node_handle = step_handle.handle(); let mut rank = 1; let mut position = 1; - while path_ref.next_step(step_handle.0).is_some() && rank < target_rank { - let step_handle = path_ref.next_step(step_handle.0).unwrap(); + let steps = self.storage.graph.path_steps(id).expect("Path has steps"); + for _ in steps.skip(1) { + if rank >= target_rank { + break; + } + step_handle = path_ref.next_step(step_handle.0).unwrap(); position += self.storage.graph.node_len(node_handle); node_handle = step_handle.handle(); rank += 1; } + println!("Now handling: {}, {}, {}", rank, position, node_handle.0); let mut triples = self.step_handle_to_triples( &path_name, subject, @@ -366,6 +408,7 @@ impl StorageGenerator { } } StepType::Position(path_name, position) => { + println!("POSITION: {}, {}", path_name, position); if let Some(id) = self.storage.graph.get_path_id(path_name.as_bytes()) { if let Some(step) = self.storage.graph.path_step_at_base(id, position) { let node_handle = @@ -646,6 +689,7 @@ impl StorageGenerator { let seq_bytes = self.storage.graph.sequence_vec(handle); let seq = str::from_utf8(&seq_bytes).expect("Node contains sequence"); let seq_value = Literal::new_simple_literal(seq); + println!("Decoding1"); if object.is_none() || self.decode_term(object.unwrap()).unwrap() == Term::Literal(seq_value.clone()) {