Fuzzer: ensure that NQuad/TriG segmentation does not affect results

pull/600/head
Tpt 1 year ago committed by Thomas Tanon
parent 88e49f6c66
commit f586cc048f
  1. 2
      fuzz/Cargo.toml
  2. 34
      fuzz/fuzz_targets/nquads.rs
  3. 2
      fuzz/fuzz_targets/sparql_query.rs
  4. 2
      fuzz/fuzz_targets/sparql_results_json.rs
  5. 2
      fuzz/fuzz_targets/sparql_results_tsv.rs
  6. 2
      fuzz/fuzz_targets/sparql_results_xml.rs
  7. 2
      fuzz/fuzz_targets/sparql_update.rs
  8. 80
      fuzz/fuzz_targets/trig.rs

@ -1,7 +1,6 @@
[package] [package]
name = "oxigraph-fuzz" name = "oxigraph-fuzz"
version = "0.0.0" version = "0.0.0"
authors = ["Automatically generated"]
publish = false publish = false
edition = "2021" edition = "2021"
@ -12,6 +11,7 @@ cargo-fuzz = true
anyhow = "1" anyhow = "1"
lazy_static = "1" lazy_static = "1"
libfuzzer-sys = "0.4" libfuzzer-sys = "0.4"
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] } oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" } oxrdfxml = { path = "../lib/oxrdfxml" }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] } spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }

@ -1,27 +1,45 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxttl::{NQuadsParser, NQuadsSerializer}; use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer, SyntaxError};
fuzz_target!(|data: &[u8]| { fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
// We parse
let mut quads = Vec::new(); let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples().parse(); let mut parser = NQuadsParser::new().with_quoted_triples().parse();
for chunk in data.split(|c| *c == 0xFF) { for chunk in chunks {
parser.extend_from_slice(chunk); parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() { while let Some(result) = parser.read_next() {
if let Ok(quad) = result { match result {
quads.push(quad); Ok(quad) => quads.push(quad),
Err(error) => errors.push(error),
} }
} }
} }
parser.end(); parser.end();
while let Some(result) = parser.read_next() { while let Some(result) = parser.read_next() {
if let Ok(quad) = result { match result {
quads.push(quad); Ok(quad) => quads.push(quad),
Err(error) => errors.push(error),
} }
} }
assert!(parser.is_end()); assert!(parser.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF));
// We parse without splitting
let (quads_without_split, errors_without_split) = parse([data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()]);
assert_eq!(quads, quads_without_split);
assert_eq!(errors.len(), errors_without_split.len());
// We serialize // We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new()); let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());

@ -3,5 +3,5 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Query; use spargebra::Query;
fuzz_target!(|data: &str| { fuzz_target!(|data: &str| {
Query::parse(data, None); let _ = Query::parse(data, None);
}); });

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Json, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Tsv, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data));

@ -3,4 +3,4 @@ use libfuzzer_sys::fuzz_target;
use oxigraph_fuzz::result_format::fuzz_result_format; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat; use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_result_format(QueryResultsFormat::Xml, data) }); fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data));

@ -4,5 +4,5 @@ use spargebra::Update;
use std::str; use std::str;
fuzz_target!(|data: &str| { fuzz_target!(|data: &str| {
Update::parse(data, None); let _ = Update::parse(data, None);
}); });

@ -1,38 +1,96 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use oxttl::{TriGParser, TriGSerializer}; use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{SyntaxError, TriGParser, TriGSerializer};
fuzz_target!(|data: &[u8]| { fn parse<'a>(chunks: impl IntoIterator<Item = &'a [u8]>) -> (Vec<Quad>, Vec<SyntaxError>) {
// We parse
let mut quads = Vec::new(); let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new() let mut parser = TriGParser::new()
.with_quoted_triples() .with_quoted_triples()
.with_base_iri("http://example.com/") .with_base_iri("http://example.com/")
.unwrap() .unwrap()
.parse(); .parse();
for chunk in data.split(|c| *c == 0xFF) { for chunk in chunks {
parser.extend_from_slice(chunk); parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() { while let Some(result) = parser.read_next() {
if let Ok(quad) = result { match result {
quads.push(quad); Ok(quad) => quads.push(quad),
Err(error) => errors.push(error),
} }
} }
} }
parser.end(); parser.end();
while let Some(result) = parser.read_next() { while let Some(result) = parser.read_next() {
if let Ok(quad) = result { match result {
quads.push(quad); Ok(quad) => quads.push(quad),
Err(error) => errors.push(error),
} }
} }
assert!(parser.is_end()); assert!(parser.is_end());
(quads, errors)
}
// We serialize fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad]) -> Vec<u8> {
let mut writer = TriGSerializer::new().serialize_to_write(Vec::new()); let mut writer = TriGSerializer::new().serialize_to_write(Vec::new());
for quad in &quads { for quad in quads {
writer.write_quad(quad).unwrap(); writer.write_quad(quad).unwrap();
} }
let new_serialization = writer.finish().unwrap(); writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF));
// We parse without splitting
let (quads_without_split, errors_without_split) = parse([data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()]);
if quads.iter().map(count_quad_blank_nodes).sum::<usize>() < 2 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize();
dataset_without_split.canonicalize();
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads)),
String::from_utf8_lossy(&serialize_quads(&quads_without_split))
);
}
assert_eq!(errors.len(), errors_without_split.len());
// We serialize
let new_serialization = serialize_quads(&quads);
// We parse the serialization // We parse the serialization
let new_quads = TriGParser::new() let new_quads = TriGParser::new()

Loading…
Cancel
Save