oxigraph/fuzz/fuzz_targets/trig.rs

#![no_main]

use libfuzzer_sys::fuzz_target;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};

fn parse<'a>(
    chunks: impl IntoIterator<Item = &'a [u8]>,
    unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
    let mut quads = Vec::new();
    let mut errors = Vec::new();
    let mut parser = TriGParser::new()
        .with_quoted_triples()
        .with_base_iri("http://example.com/")
        .unwrap();
    if unchecked {
        parser = parser.unchecked();
    }
    let mut reader = parser.parse();
    for chunk in chunks {
        reader.extend_from_slice(chunk);
        while let Some(result) = reader.read_next() {
            match result {
                Ok(quad) => quads.push(quad),
                Err(error) => errors.push(error.to_string()),
            }
        }
    }
    reader.end();
    while let Some(result) = reader.read_next() {
        match result {
            Ok(quad) => quads.push(quad),
            Err(error) => errors.push(error.to_string()),
        }
    }
    assert!(reader.is_end());
    (
        quads,
        errors,
        reader
            .prefixes()
            .map(|(k, v)| (k.to_owned(), v.to_owned()))
            .collect(),
    )
}

fn count_triple_blank_nodes(triple: &Triple) -> usize {
    (match &triple.subject {
        Subject::BlankNode(_) => 1,
        Subject::Triple(t) => count_triple_blank_nodes(t),
        _ => 0,
    }) + (match &triple.object {
        Term::BlankNode(_) => 1,
        Term::Triple(t) => count_triple_blank_nodes(t),
        _ => 0,
    })
}

fn count_quad_blank_nodes(quad: &Quad) -> usize {
    (match &quad.subject {
        Subject::BlankNode(_) => 1,
        Subject::Triple(t) => count_triple_blank_nodes(t),
        _ => 0,
    }) + (match &quad.object {
        Term::BlankNode(_) => 1,
        Term::Triple(t) => count_triple_blank_nodes(t),
        _ => 0,
    }) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}

fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
    let mut serializer = TriGSerializer::new();
    for (prefix_name, prefix_iri) in prefixes {
        serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
    }
    let mut writer = serializer.serialize_to_write(Vec::new());
    for quad in quads {
        writer.write_quad(quad).unwrap();
    }
    writer.finish().unwrap()
}

fuzz_target!(|data: &[u8]| {
    // We parse with splitting
    let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
    // We parse without splitting
    let (quads_without_split, errors_without_split, _) = parse(
        [data
            .iter()
            .copied()
            .filter(|c| *c != 0xFF)
            .collect::<Vec<_>>()
            .as_slice()],
        false,
    );
    let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
    if errors.is_empty() {
        assert!(errors_unchecked.is_empty());
    }

    let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
    if bnodes_count == 0 {
        assert_eq!(
            quads,
            quads_without_split,
            "With split:\n{}\nWithout split:\n{}",
            String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
            String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
        );
        if errors.is_empty() {
            assert_eq!(
                quads,
                quads_unchecked,
                "Validating:\n{}\nUnchecked:\n{}",
                String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
                String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
            );
        }
    } else if bnodes_count <= 4 {
        let mut dataset_with_split = quads.iter().collect::<Dataset>();
        let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
        dataset_with_split.canonicalize();
        dataset_without_split.canonicalize();
        assert_eq!(
            dataset_with_split,
            dataset_without_split,
            "With split:\n{}\nWithout split:\n{}",
            String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
            String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
        );
        if errors.is_empty() {
            if errors.is_empty() {
                let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
                dataset_unchecked.canonicalize();
                assert_eq!(
                    dataset_with_split,
                    dataset_unchecked,
                    "Validating:\n{}\nUnchecked:\n{}",
                    String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
                    String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
                );
            }
        }
    }
    assert_eq!(errors, errors_without_split);

    // We serialize
    let new_serialization = serialize_quads(&quads, prefixes);

    // We parse the serialization
    let new_quads = TriGParser::new()
        .with_quoted_triples()
        .parse_read(new_serialization.as_slice())
        .collect::<Result<Vec<_>, _>>()
        .map_err(|e| {
            format!(
                "Error on {:?} from {quads:?} based on {:?}: {e}",
                String::from_utf8_lossy(&new_serialization),
                String::from_utf8_lossy(data)
            )
        })
        .unwrap();

    // We check the roundtrip has not changed anything
    assert_eq!(new_quads, quads);
});
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`#![no_main]`

			`use libfuzzer_sys::fuzz_target;`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};`
OxTTL: return file position in errors 1 year ago			`use oxttl::{TriGParser, TriGSerializer};`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`fn parse<'a>(`
			`chunks: impl IntoIterator<Item = &'a [u8]>,`
			`unchecked: bool,`
Serialization: allows to set prefixes 10 months ago			`) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`let mut quads = Vec::new();`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`let mut errors = Vec::new();`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`let mut parser = TriGParser::new()`
			`.with_quoted_triples()`
			`.with_base_iri("http://example.com/")`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`.unwrap();`
			`if unchecked {`
			`parser = parser.unchecked();`
			`}`
			`let mut reader = parser.parse();`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`for chunk in chunks {`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`reader.extend_from_slice(chunk);`
			`while let Some(result) = reader.read_next() {`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`match result {`
			`Ok(quad) => quads.push(quad),`
OxTTL: return file position in errors 1 year ago			`Err(error) => errors.push(error.to_string()),`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`}`
			`}`
			`}`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`reader.end();`
			`while let Some(result) = reader.read_next() {`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`match result {`
			`Ok(quad) => quads.push(quad),`
OxTTL: return file position in errors 1 year ago			`Err(error) => errors.push(error.to_string()),`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`}`
			`}`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`assert!(reader.is_end());`
Serialization: allows to set prefixes 10 months ago			`(`
			`quads,`
			`errors,`
			`reader`
			`.prefixes()`
			`.map(\|(k, v)\| (k.to_owned(), v.to_owned()))`
			`.collect(),`
			`)`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`}`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`fn count_triple_blank_nodes(triple: &Triple) -> usize {`
			`(match &triple.subject {`
			`Subject::BlankNode(_) => 1,`
			`Subject::Triple(t) => count_triple_blank_nodes(t),`
			`_ => 0,`
			`}) + (match &triple.object {`
			`Term::BlankNode(_) => 1,`
			`Term::Triple(t) => count_triple_blank_nodes(t),`
			`_ => 0,`
			`})`
			`}`

			`fn count_quad_blank_nodes(quad: &Quad) -> usize {`
			`(match &quad.subject {`
			`Subject::BlankNode(_) => 1,`
			`Subject::Triple(t) => count_triple_blank_nodes(t),`
			`_ => 0,`
			`}) + (match &quad.object {`
			`Term::BlankNode(_) => 1,`
			`Term::Triple(t) => count_triple_blank_nodes(t),`
			`_ => 0,`
			`}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))`
			`}`

Serialization: allows to set prefixes 10 months ago			`fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {`
			`let mut serializer = TriGSerializer::new();`
			`for (prefix_name, prefix_iri) in prefixes {`
			`serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();`
			`}`
			`let mut writer = serializer.serialize_to_write(Vec::new());`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`for quad in quads {`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`writer.write_quad(quad).unwrap();`
			`}`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`writer.finish().unwrap()`
			`}`

			`fuzz_target!(\|data: &[u8]\| {`
			`// We parse with splitting`
Serialization: allows to set prefixes 10 months ago			`let (quads, errors, prefixes) = parse(data.split(\|c\| *c == 0xFF), false);`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`// We parse without splitting`
Serialization: allows to set prefixes 10 months ago			`let (quads_without_split, errors_without_split, _) = parse(`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`[data`
			`.iter()`
			`.copied()`
			`.filter(\|c\| *c != 0xFF)`
			`.collect::<Vec<_>>()`
			`.as_slice()],`
			`false,`
			`);`
Serialization: allows to set prefixes 10 months ago			`let (quads_unchecked, errors_unchecked, _) = parse(data.split(\|c\| *c == 0xFF), true);`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`if errors.is_empty() {`
			`assert!(errors_unchecked.is_empty());`
			`}`

Isomorphism: makes sure that new hashes depends on the old ones Allows to make the "distinguish" step work 1 year ago			`let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();`
			`if bnodes_count == 0 {`
			`assert_eq!(`
			`quads,`
			`quads_without_split,`
			`"With split:\n{}\nWithout split:\n{}",`
Serialization: allows to set prefixes 10 months ago			`String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),`
			`String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))`
Isomorphism: makes sure that new hashes depends on the old ones Allows to make the "distinguish" step work 1 year ago			`);`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`if errors.is_empty() {`
			`assert_eq!(`
			`quads,`
			`quads_unchecked,`
			`"Validating:\n{}\nUnchecked:\n{}",`
Serialization: allows to set prefixes 10 months ago			`String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),`
			`String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`);`
			`}`
Isomorphism: makes sure that new hashes depends on the old ones Allows to make the "distinguish" step work 1 year ago			`} else if bnodes_count <= 4 {`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`let mut dataset_with_split = quads.iter().collect::<Dataset>();`
			`let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();`
			`dataset_with_split.canonicalize();`
			`dataset_without_split.canonicalize();`
			`assert_eq!(`
			`dataset_with_split,`
			`dataset_without_split,`
			`"With split:\n{}\nWithout split:\n{}",`
Serialization: allows to set prefixes 10 months ago			`String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),`
			`String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`);`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`if errors.is_empty() {`
			`if errors.is_empty() {`
			`let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();`
			`dataset_unchecked.canonicalize();`
			`assert_eq!(`
			`dataset_with_split,`
			`dataset_unchecked,`
			`"Validating:\n{}\nUnchecked:\n{}",`
Serialization: allows to set prefixes 10 months ago			`String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),`
			`String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))`
Parsers: adds "unchecked" option for faster parsing Does not validate IRIs and language tags 10 months ago			`);`
			`}`
			`}`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago			`}`
OxTTL: return file position in errors 1 year ago			`assert_eq!(errors, errors_without_split);`
Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago
			`// We serialize`
Serialization: allows to set prefixes 10 months ago			`let new_serialization = serialize_quads(&quads, prefixes);`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago
			`// We parse the serialization`
			`let new_quads = TriGParser::new()`
			`.with_quoted_triples()`
Renames "parse_from_read" to "parse_read" 1 year ago			`.parse_read(new_serialization.as_slice())`
New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers - Compatible with async IO - Turtle/TriG parser recovery on simple errors 2 years ago			`.collect::<Result<Vec<_>, _>>()`
			`.map_err(\|e\| {`
			`format!(`
			`"Error on {:?} from {quads:?} based on {:?}: {e}",`
			`String::from_utf8_lossy(&new_serialization),`
			`String::from_utf8_lossy(data)`
			`)`
			`})`
			`.unwrap();`

			`// We check the roundtrip has not changed anything`
			`assert_eq!(new_quads, quads);`
			`});`