sparesults: avoid stack overflow errors on too many nested triples

pull/190/head
Tpt 3 years ago
parent 0105a4a54a
commit 9dc8d348c4
  1. 17
      lib/oxrdf/src/parser.rs
  2. 13
      lib/sparesults/src/csv.rs
  3. 23
      lib/sparesults/src/json.rs

@ -8,6 +8,10 @@ use std::error::Error;
use std::fmt; use std::fmt;
use std::str::{Chars, FromStr}; use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 512;
impl FromStr for NamedNode { impl FromStr for NamedNode {
type Err = TermParseError; type Err = TermParseError;
@ -95,7 +99,7 @@ impl FromStr for Term {
/// ).into()); /// ).into());
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, TermParseError> { fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_term(s)?; let (term, left) = read_term(s, 0)?;
if !left.is_empty() { if !left.is_empty() {
return Err(TermParseError::msg("Invalid term serialization")); return Err(TermParseError::msg("Invalid term serialization"));
} }
@ -293,12 +297,17 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
} }
} }
fn read_term(s: &str) -> Result<(Term, &str), TermParseError> { fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> {
let s = s.trim(); let s = s.trim();
if let Some(remain) = s.strip_prefix("<<") { if let Some(remain) = s.strip_prefix("<<") {
let (subject, remain) = read_term(remain)?; if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(TermParseError::msg(
"Too many nested triples. The parser fails here to avoid a stack overflow.",
));
}
let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let (predicate, remain) = read_named_node(remain)?; let (predicate, remain) = read_named_node(remain)?;
let (object, remain) = read_term(remain)?; let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let remain = remain.trim_start(); let remain = remain.trim_start();
if let Some(remain) = remain.strip_prefix(">>") { if let Some(remain) = remain.strip_prefix(">>") {
#[cfg(feature = "rdf-star")] #[cfg(feature = "rdf-star")]

@ -326,17 +326,18 @@ mod tests {
} }
#[test] #[test]
fn test_bad_tsv() -> io::Result<()> { fn test_bad_tsv() {
let bad_tsvs = vec![ let mut bad_tsvs = vec![
"?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<", "?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<",
]; ];
let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000));
bad_tsvs.push(&a_lot_of_strings);
for bad_tsv in bad_tsvs { for bad_tsv in bad_tsvs {
if let TsvQueryResultsReader::Solutions { mut solutions, .. } = if let Ok(TsvQueryResultsReader::Solutions { mut solutions, .. }) =
TsvQueryResultsReader::read(Cursor::new(bad_tsv))? TsvQueryResultsReader::read(Cursor::new(bad_tsv))
{ {
while solutions.read_next()?.is_some() {} while let Ok(Some(_)) = solutions.read_next() {}
} }
} }
Ok(())
} }
} }

@ -8,6 +8,10 @@ use oxrdf::*;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 256;
pub fn write_boolean_json_result<W: Write>(sink: W, value: bool) -> io::Result<W> { pub fn write_boolean_json_result<W: Write>(sink: W, value: bool) -> io::Result<W> {
let mut writer = JsonWriter::from_writer(sink); let mut writer = JsonWriter::from_writer(sink);
writer.write_event(JsonEvent::StartObject)?; writer.write_event(JsonEvent::StartObject)?;
@ -239,14 +243,21 @@ impl<R: BufRead> JsonSolutionsReader<R> {
key key
)) ))
})?; })?;
new_bindings[k] = Some(self.read_value()?) new_bindings[k] = Some(self.read_value(0)?)
} }
_ => return Err(SyntaxError::msg("Invalid result serialization").into()), _ => return Err(SyntaxError::msg("Invalid result serialization").into()),
} }
} }
} }
fn read_value(&mut self) -> Result<Term, ParseError> { fn read_value(&mut self, number_of_recursive_calls: usize) -> Result<Term, ParseError> {
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(SyntaxError::msg(format!(
"Too many nested triples ({}). The parser fails here to avoid a stack overflow.",
MAX_NUMBER_OF_NESTED_TRIPLES
))
.into());
}
enum Type { enum Type {
Uri, Uri,
BNode, BNode,
@ -279,9 +290,11 @@ impl<R: BufRead> JsonSolutionsReader<R> {
"value" => state = Some(State::Value), "value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang), "xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype), "datatype" => state = Some(State::Datatype),
"subject" => subject = Some(self.read_value()?), "subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?),
"predicate" => predicate = Some(self.read_value()?), "predicate" => {
"object" => object = Some(self.read_value()?), predicate = Some(self.read_value(number_of_recursive_calls + 1)?)
}
"object" => object = Some(self.read_value(number_of_recursive_calls + 1)?),
_ => { _ => {
return Err(SyntaxError::msg(format!( return Err(SyntaxError::msg(format!(
"Unexpected key in term serialization: '{}'", "Unexpected key in term serialization: '{}'",

Loading…
Cancel
Save