From 9dc8d348c4c5c0cdae2f4781d79aafe877069da3 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 12 Jan 2022 20:02:09 +0100 Subject: [PATCH] sparesults: avoid stack overflow errors on too many nested triples --- lib/oxrdf/src/parser.rs | 17 +++++++++++++---- lib/sparesults/src/csv.rs | 13 +++++++------ lib/sparesults/src/json.rs | 23 ++++++++++++++++++----- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/lib/oxrdf/src/parser.rs b/lib/oxrdf/src/parser.rs index af74b151..43f759dc 100644 --- a/lib/oxrdf/src/parser.rs +++ b/lib/oxrdf/src/parser.rs @@ -8,6 +8,10 @@ use std::error::Error; use std::fmt; use std::str::{Chars, FromStr}; +/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. +/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. +const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 512; + impl FromStr for NamedNode { type Err = TermParseError; @@ -95,7 +99,7 @@ impl FromStr for Term { /// ).into()); /// ``` fn from_str(s: &str) -> Result { - let (term, left) = read_term(s)?; + let (term, left) = read_term(s, 0)?; if !left.is_empty() { return Err(TermParseError::msg("Invalid term serialization")); } @@ -293,12 +297,17 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> { } } -fn read_term(s: &str) -> Result<(Term, &str), TermParseError> { +fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> { let s = s.trim(); if let Some(remain) = s.strip_prefix("<<") { - let (subject, remain) = read_term(remain)?; + if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { + return Err(TermParseError::msg( + "Too many nested triples. The parser fails here to avoid a stack overflow.", + )); + } + let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?; let (predicate, remain) = read_named_node(remain)?; - let (object, remain) = read_term(remain)?; + let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?; let remain = remain.trim_start(); if let Some(remain) = remain.strip_prefix(">>") { #[cfg(feature = "rdf-star")] diff --git a/lib/sparesults/src/csv.rs b/lib/sparesults/src/csv.rs index 9db6e1f2..83407faa 100644 --- a/lib/sparesults/src/csv.rs +++ b/lib/sparesults/src/csv.rs @@ -326,17 +326,18 @@ mod tests { } #[test] - fn test_bad_tsv() -> io::Result<()> { - let bad_tsvs = vec![ + fn test_bad_tsv() { + let mut bad_tsvs = vec![ "?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<", ]; + let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000)); + bad_tsvs.push(&a_lot_of_strings); for bad_tsv in bad_tsvs { - if let TsvQueryResultsReader::Solutions { mut solutions, .. } = - TsvQueryResultsReader::read(Cursor::new(bad_tsv))? + if let Ok(TsvQueryResultsReader::Solutions { mut solutions, .. }) = + TsvQueryResultsReader::read(Cursor::new(bad_tsv)) { - while solutions.read_next()?.is_some() {} + while let Ok(Some(_)) = solutions.read_next() {} } } - Ok(()) } } diff --git a/lib/sparesults/src/json.rs b/lib/sparesults/src/json.rs index a80f35b7..28769d0a 100644 --- a/lib/sparesults/src/json.rs +++ b/lib/sparesults/src/json.rs @@ -8,6 +8,10 @@ use oxrdf::*; use std::collections::BTreeMap; use std::io::{self, BufRead, Write}; +/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls. +/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors. +const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 256; + pub fn write_boolean_json_result(sink: W, value: bool) -> io::Result { let mut writer = JsonWriter::from_writer(sink); writer.write_event(JsonEvent::StartObject)?; @@ -239,14 +243,21 @@ impl JsonSolutionsReader { key )) })?; - new_bindings[k] = Some(self.read_value()?) + new_bindings[k] = Some(self.read_value(0)?) } _ => return Err(SyntaxError::msg("Invalid result serialization").into()), } } } - fn read_value(&mut self) -> Result { + fn read_value(&mut self, number_of_recursive_calls: usize) -> Result { + if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES { + return Err(SyntaxError::msg(format!( + "Too many nested triples ({}). The parser fails here to avoid a stack overflow.", + MAX_NUMBER_OF_NESTED_TRIPLES + )) + .into()); + } enum Type { Uri, BNode, @@ -279,9 +290,11 @@ impl JsonSolutionsReader { "value" => state = Some(State::Value), "xml:lang" => state = Some(State::Lang), "datatype" => state = Some(State::Datatype), - "subject" => subject = Some(self.read_value()?), - "predicate" => predicate = Some(self.read_value()?), - "object" => object = Some(self.read_value()?), + "subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?), + "predicate" => { + predicate = Some(self.read_value(number_of_recursive_calls + 1)?) + } + "object" => object = Some(self.read_value(number_of_recursive_calls + 1)?), _ => { return Err(SyntaxError::msg(format!( "Unexpected key in term serialization: '{}'",