From f586cc048f823d8faba31b5eba26ac635050e7fe Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 6 Jul 2023 17:56:05 +0200 Subject: [PATCH] Fuzzer: ensure that NQuad/TriG segmentation does not affect results --- fuzz/Cargo.toml | 2 +- fuzz/fuzz_targets/nquads.rs | 34 +++++++--- fuzz/fuzz_targets/sparql_query.rs | 2 +- fuzz/fuzz_targets/sparql_results_json.rs | 2 +- fuzz/fuzz_targets/sparql_results_tsv.rs | 2 +- fuzz/fuzz_targets/sparql_results_xml.rs | 2 +- fuzz/fuzz_targets/sparql_update.rs | 2 +- fuzz/fuzz_targets/trig.rs | 80 ++++++++++++++++++++---- 8 files changed, 101 insertions(+), 25 deletions(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 8bef5528..f1524903 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,7 +1,6 @@ [package] name = "oxigraph-fuzz" version = "0.0.0" -authors = ["Automatically generated"] publish = false edition = "2021" @@ -12,6 +11,7 @@ cargo-fuzz = true anyhow = "1" lazy_static = "1" libfuzzer-sys = "0.4" +oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] } oxttl = { path = "../lib/oxttl", features = ["rdf-star"] } oxrdfxml = { path = "../lib/oxrdfxml" } spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] } diff --git a/fuzz/fuzz_targets/nquads.rs b/fuzz/fuzz_targets/nquads.rs index b8b1ac6e..a7de4913 100644 --- a/fuzz/fuzz_targets/nquads.rs +++ b/fuzz/fuzz_targets/nquads.rs @@ -1,27 +1,45 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use oxttl::{NQuadsParser, NQuadsSerializer}; +use oxrdf::Quad; +use oxttl::{NQuadsParser, NQuadsSerializer, SyntaxError}; -fuzz_target!(|data: &[u8]| { - // We parse +fn parse<'a>(chunks: impl IntoIterator) -> (Vec, Vec) { let mut quads = Vec::new(); + let mut errors = Vec::new(); let mut parser = NQuadsParser::new().with_quoted_triples().parse(); - for chunk in data.split(|c| *c == 0xFF) { + for chunk in chunks { parser.extend_from_slice(chunk); while let Some(result) = parser.read_next() { - if let Ok(quad) = result { - quads.push(quad); + match result { + Ok(quad) => quads.push(quad), + Err(error) => errors.push(error), } } } parser.end(); while let Some(result) = parser.read_next() { - if let Ok(quad) = result { - quads.push(quad); + match result { + Ok(quad) => quads.push(quad), + Err(error) => errors.push(error), } } assert!(parser.is_end()); + (quads, errors) +} + +fuzz_target!(|data: &[u8]| { + // We parse with splitting + let (quads, errors) = parse(data.split(|c| *c == 0xFF)); + // We parse without splitting + let (quads_without_split, errors_without_split) = parse([data + .iter() + .copied() + .filter(|c| *c != 0xFF) + .collect::>() + .as_slice()]); + assert_eq!(quads, quads_without_split); + assert_eq!(errors.len(), errors_without_split.len()); // We serialize let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); diff --git a/fuzz/fuzz_targets/sparql_query.rs b/fuzz/fuzz_targets/sparql_query.rs index 889d3e79..136a2c0f 100644 --- a/fuzz/fuzz_targets/sparql_query.rs +++ b/fuzz/fuzz_targets/sparql_query.rs @@ -3,5 +3,5 @@ use libfuzzer_sys::fuzz_target; use spargebra::Query; fuzz_target!(|data: &str| { - Query::parse(data, None); + let _ = Query::parse(data, None); }); diff --git a/fuzz/fuzz_targets/sparql_results_json.rs b/fuzz/fuzz_targets/sparql_results_json.rs index cd917481..f9588f8b 100644 --- a/fuzz/fuzz_targets/sparql_results_json.rs +++ b/fuzz/fuzz_targets/sparql_results_json.rs @@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target; use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::QueryResultsFormat; -fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Json, data) }); +fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data)); diff --git a/fuzz/fuzz_targets/sparql_results_tsv.rs b/fuzz/fuzz_targets/sparql_results_tsv.rs index 4cf3f4cf..1aa600b7 100644 --- a/fuzz/fuzz_targets/sparql_results_tsv.rs +++ b/fuzz/fuzz_targets/sparql_results_tsv.rs @@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target; use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::QueryResultsFormat; -fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Tsv, data) }); +fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data)); diff --git a/fuzz/fuzz_targets/sparql_results_xml.rs b/fuzz/fuzz_targets/sparql_results_xml.rs index 6c4747ec..451f528a 100644 --- a/fuzz/fuzz_targets/sparql_results_xml.rs +++ b/fuzz/fuzz_targets/sparql_results_xml.rs @@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target; use oxigraph_fuzz::result_format::fuzz_result_format; use sparesults::QueryResultsFormat; -fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Xml, data) }); +fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data)); diff --git a/fuzz/fuzz_targets/sparql_update.rs b/fuzz/fuzz_targets/sparql_update.rs index 15c0a995..56ffdae3 100644 --- a/fuzz/fuzz_targets/sparql_update.rs +++ b/fuzz/fuzz_targets/sparql_update.rs @@ -4,5 +4,5 @@ use spargebra::Update; use std::str; fuzz_target!(|data: &str| { - Update::parse(data, None); + let _ = Update::parse(data, None); }); diff --git a/fuzz/fuzz_targets/trig.rs b/fuzz/fuzz_targets/trig.rs index e6ed06c7..a96ca86c 100644 --- a/fuzz/fuzz_targets/trig.rs +++ b/fuzz/fuzz_targets/trig.rs @@ -1,38 +1,96 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use oxttl::{TriGParser, TriGSerializer}; +use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple}; +use oxttl::{SyntaxError, TriGParser, TriGSerializer}; -fuzz_target!(|data: &[u8]| { - // We parse +fn parse<'a>(chunks: impl IntoIterator) -> (Vec, Vec) { let mut quads = Vec::new(); + let mut errors = Vec::new(); let mut parser = TriGParser::new() .with_quoted_triples() .with_base_iri("http://example.com/") .unwrap() .parse(); - for chunk in data.split(|c| *c == 0xFF) { + for chunk in chunks { parser.extend_from_slice(chunk); while let Some(result) = parser.read_next() { - if let Ok(quad) = result { - quads.push(quad); + match result { + Ok(quad) => quads.push(quad), + Err(error) => errors.push(error), } } } parser.end(); while let Some(result) = parser.read_next() { - if let Ok(quad) = result { - quads.push(quad); + match result { + Ok(quad) => quads.push(quad), + Err(error) => errors.push(error), } } assert!(parser.is_end()); + (quads, errors) +} - // We serialize +fn count_triple_blank_nodes(triple: &Triple) -> usize { + (match &triple.subject { + Subject::BlankNode(_) => 1, + Subject::Triple(t) => count_triple_blank_nodes(t), + _ => 0, + }) + (match &triple.object { + Term::BlankNode(_) => 1, + Term::Triple(t) => count_triple_blank_nodes(t), + _ => 0, + }) +} + +fn count_quad_blank_nodes(quad: &Quad) -> usize { + (match &quad.subject { + Subject::BlankNode(_) => 1, + Subject::Triple(t) => count_triple_blank_nodes(t), + _ => 0, + }) + (match &quad.object { + Term::BlankNode(_) => 1, + Term::Triple(t) => count_triple_blank_nodes(t), + _ => 0, + }) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_))) +} + +fn serialize_quads(quads: &[Quad]) -> Vec { let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); - for quad in &quads { + for quad in quads { writer.write_quad(quad).unwrap(); } - let new_serialization = writer.finish().unwrap(); + writer.finish().unwrap() +} + +fuzz_target!(|data: &[u8]| { + // We parse with splitting + let (quads, errors) = parse(data.split(|c| *c == 0xFF)); + // We parse without splitting + let (quads_without_split, errors_without_split) = parse([data + .iter() + .copied() + .filter(|c| *c != 0xFF) + .collect::>() + .as_slice()]); + if quads.iter().map(count_quad_blank_nodes).sum::() < 2 { + let mut dataset_with_split = quads.iter().collect::(); + let mut dataset_without_split = quads_without_split.iter().collect::(); + dataset_with_split.canonicalize(); + dataset_without_split.canonicalize(); + assert_eq!( + dataset_with_split, + dataset_without_split, + "With split:\n{}\nWithout split:\n{}", + String::from_utf8_lossy(&serialize_quads(&quads)), + String::from_utf8_lossy(&serialize_quads(&quads_without_split)) + ); + } + assert_eq!(errors.len(), errors_without_split.len()); + + // We serialize + let new_serialization = serialize_quads(&quads); // We parse the serialization let new_quads = TriGParser::new()