sparesults: avoid stack overflow errors on too many nested triples

pull/190/head
Tpt 3 years ago
parent 0105a4a54a
commit 9dc8d348c4
  1. 17
      lib/oxrdf/src/parser.rs
  2. 13
      lib/sparesults/src/csv.rs
  3. 23
      lib/sparesults/src/json.rs

@ -8,6 +8,10 @@ use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 512;
impl FromStr for NamedNode {
type Err = TermParseError;
@ -95,7 +99,7 @@ impl FromStr for Term {
/// ).into());
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
let (term, left) = read_term(s)?;
let (term, left) = read_term(s, 0)?;
if !left.is_empty() {
return Err(TermParseError::msg("Invalid term serialization"));
}
@ -293,12 +297,17 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
}
}
fn read_term(s: &str) -> Result<(Term, &str), TermParseError> {
fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> {
let s = s.trim();
if let Some(remain) = s.strip_prefix("<<") {
let (subject, remain) = read_term(remain)?;
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(TermParseError::msg(
"Too many nested triples. The parser fails here to avoid a stack overflow.",
));
}
let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let (predicate, remain) = read_named_node(remain)?;
let (object, remain) = read_term(remain)?;
let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let remain = remain.trim_start();
if let Some(remain) = remain.strip_prefix(">>") {
#[cfg(feature = "rdf-star")]

@ -326,17 +326,18 @@ mod tests {
}
#[test]
fn test_bad_tsv() -> io::Result<()> {
let bad_tsvs = vec![
fn test_bad_tsv() {
let mut bad_tsvs = vec![
"?", "?p", "?p?o", "?p\n<", "?p\n_", "?p\n_:", "?p\n\"", "?p\n<<",
];
let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000));
bad_tsvs.push(&a_lot_of_strings);
for bad_tsv in bad_tsvs {
if let TsvQueryResultsReader::Solutions { mut solutions, .. } =
TsvQueryResultsReader::read(Cursor::new(bad_tsv))?
if let Ok(TsvQueryResultsReader::Solutions { mut solutions, .. }) =
TsvQueryResultsReader::read(Cursor::new(bad_tsv))
{
while solutions.read_next()?.is_some() {}
while let Ok(Some(_)) = solutions.read_next() {}
}
}
Ok(())
}
}

@ -8,6 +8,10 @@ use oxrdf::*;
use std::collections::BTreeMap;
use std::io::{self, BufRead, Write};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 256;
pub fn write_boolean_json_result<W: Write>(sink: W, value: bool) -> io::Result<W> {
let mut writer = JsonWriter::from_writer(sink);
writer.write_event(JsonEvent::StartObject)?;
@ -239,14 +243,21 @@ impl<R: BufRead> JsonSolutionsReader<R> {
key
))
})?;
new_bindings[k] = Some(self.read_value()?)
new_bindings[k] = Some(self.read_value(0)?)
}
_ => return Err(SyntaxError::msg("Invalid result serialization").into()),
}
}
}
fn read_value(&mut self) -> Result<Term, ParseError> {
fn read_value(&mut self, number_of_recursive_calls: usize) -> Result<Term, ParseError> {
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(SyntaxError::msg(format!(
"Too many nested triples ({}). The parser fails here to avoid a stack overflow.",
MAX_NUMBER_OF_NESTED_TRIPLES
))
.into());
}
enum Type {
Uri,
BNode,
@ -279,9 +290,11 @@ impl<R: BufRead> JsonSolutionsReader<R> {
"value" => state = Some(State::Value),
"xml:lang" => state = Some(State::Lang),
"datatype" => state = Some(State::Datatype),
"subject" => subject = Some(self.read_value()?),
"predicate" => predicate = Some(self.read_value()?),
"object" => object = Some(self.read_value()?),
"subject" => subject = Some(self.read_value(number_of_recursive_calls + 1)?),
"predicate" => {
predicate = Some(self.read_value(number_of_recursive_calls + 1)?)
}
"object" => object = Some(self.read_value(number_of_recursive_calls + 1)?),
_ => {
return Err(SyntaxError::msg(format!(
"Unexpected key in term serialization: '{}'",

Loading…
Cancel
Save