From d92e5466f5b10de378d87e1aa7163ff0959ba208 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 27 May 2018 09:10:01 +0200 Subject: [PATCH] Reduces the number of allocations during NTriples parsing --- src/rio/ntriples/mod.rs | 42 +++++++++++++++++------ src/rio/ntriples/ntriples_grammar.rustpeg | 4 +-- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/rio/ntriples/mod.rs b/src/rio/ntriples/mod.rs index 68a7a273..7ca458bb 100644 --- a/src/rio/ntriples/mod.rs +++ b/src/rio/ntriples/mod.rs @@ -11,15 +11,37 @@ use std::io::BufRead; use std::io::BufReader; use std::io::Read; +struct NTriplesIterator { + buffer: String, + reader: BufReader, + bnodes_map: BTreeMap, +} + +impl Iterator for NTriplesIterator { + type Item = RioResult; + + fn next(&mut self) -> Option> { + match self.reader.read_line(&mut self.buffer) { + Ok(line_count) => if line_count == 0 { + None + } else { + let result = grammar::triple(&self.buffer, &mut self.bnodes_map); + self.buffer.clear(); + match result { + Ok(Some(triple)) => Some(Ok(triple)), + Ok(None) => self.next(), + Err(error) => Some(Err(RioError::new(error))), + } + }, + Err(error) => Some(Err(error.into())), + } + } +} + pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator> { - //TODO: use read_lines to avoid allocations - let lines = BufReader::new(source).lines(); - let mut bnodes_map: BTreeMap = BTreeMap::default(); - lines.flat_map(move |line| match line { - Ok(line) => match grammar::triple(line.as_str(), &mut bnodes_map) { - Ok(triple) => Some(Ok(triple?)), - Err(error) => Some(Err(RioError::new(error))), - }, - Err(error) => Some(Err(error.into())), - }) + NTriplesIterator { + buffer: String::default(), + reader: BufReader::new(source), + bnodes_map: BTreeMap::default(), + } } diff --git a/src/rio/ntriples/ntriples_grammar.rustpeg b/src/rio/ntriples/ntriples_grammar.rustpeg index 05c6596a..6ca4c2db 100644 --- a/src/rio/ntriples/ntriples_grammar.rustpeg +++ b/src/rio/ntriples/ntriples_grammar.rustpeg @@ -11,8 +11,8 @@ use std::collections::BTreeMap; //[2] #[pub] triple -> Option = - _ s:subject _ p:predicate _ o:object _ "." _ comment? { Some(Triple::new(s, p, o)) } / - _ comment? { None } + _ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } / + _ comment? EOL? { None } //[3] subject -> NamedOrBlankNode =