From f78121f9d38f4e380c883015bfef4195e3a7a8cf Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 13 Aug 2019 17:27:23 +0200 Subject: [PATCH] Removes utils file --- lib/src/lib.rs | 1 - lib/src/model/literal.rs | 25 +++- lib/src/model/named_node.rs | 6 +- lib/src/rio/mod.rs | 1 - lib/src/rio/utils.rs | 156 ---------------------- lib/src/sparql/algebra.rs | 30 +++-- lib/src/sparql/parser.rs | 182 +++++++++++++++++++++++++- lib/src/sparql/sparql_grammar.rustpeg | 1 - lib/src/store/memory.rs | 1 - lib/src/store/numeric_encoder.rs | 18 ++- lib/src/store/rocksdb.rs | 1 - lib/src/utils.rs | 129 ------------------ 12 files changed, 240 insertions(+), 311 deletions(-) delete mode 100644 lib/src/rio/utils.rs delete mode 100644 lib/src/utils.rs diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 0a64213f..4c5fa47e 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -41,7 +41,6 @@ pub mod model; pub mod rio; pub mod sparql; pub mod store; -mod utils; pub use failure::Error; pub type Result = ::std::result::Result; diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index 657a04da..33127f8a 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -2,13 +2,13 @@ use crate::model::language_tag::LanguageTag; use crate::model::named_node::NamedNode; use crate::model::vocab::rdf; use crate::model::vocab::xsd; -use crate::utils::Escaper; use chrono::prelude::*; use num_traits::identities::Zero; use num_traits::FromPrimitive; use num_traits::One; use num_traits::ToPrimitive; use ordered_float::OrderedFloat; +use rio_api::model as rio; use rust_decimal::Decimal; use std::borrow::Cow; use std::fmt; @@ -393,10 +393,27 @@ impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.is_plain() { self.language() - .map(|lang| write!(f, "\"{}\"@{}", self.value().escape(), lang)) - .unwrap_or_else(|| write!(f, "\"{}\"", self.value().escape())) + .map(|lang| { + rio::Literal::LanguageTaggedString { + value: &self.value(), + language: lang.as_str(), + } + .fmt(f) + }) + .unwrap_or_else(|| { + rio::Literal::Simple { + value: &self.value(), + } + .fmt(f) + }) } else { - write!(f, "\"{}\"^^{}", self.value().escape(), self.datatype()) + rio::Literal::Typed { + value: &self.value(), + datatype: rio::NamedNode { + iri: self.datatype().as_str(), + }, + } + .fmt(f) } } } diff --git a/lib/src/model/named_node.rs b/lib/src/model/named_node.rs index 36472ee6..f5077e8d 100644 --- a/lib/src/model/named_node.rs +++ b/lib/src/model/named_node.rs @@ -1,5 +1,6 @@ use crate::Error; use crate::Result; +use rio_api::model as rio; use std::fmt; use std::str::FromStr; use std::sync::Arc; @@ -28,7 +29,10 @@ pub struct NamedNode { impl fmt::Display for NamedNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "<{}>", self.iri) + rio::NamedNode { + iri: self.iri.as_str(), + } + .fmt(f) } } diff --git a/lib/src/rio/mod.rs b/lib/src/rio/mod.rs index 192af82e..685a3f3b 100644 --- a/lib/src/rio/mod.rs +++ b/lib/src/rio/mod.rs @@ -3,5 +3,4 @@ pub mod ntriples; mod rio; pub mod turtle; -pub(crate) mod utils; pub mod xml; diff --git a/lib/src/rio/utils.rs b/lib/src/rio/utils.rs deleted file mode 100644 index 0cce2252..00000000 --- a/lib/src/rio/utils.rs +++ /dev/null @@ -1,156 +0,0 @@ -use crate::utils::StaticSliceMap; -use std::borrow::Cow; -use std::char; -use std::str::Chars; - -pub fn unescape_unicode_codepoints(input: &str) -> Cow<'_, str> { - if needs_unescape_unicode_codepoints(input) { - UnescapeUnicodeCharIterator::new(input).collect() - } else { - input.into() - } -} - -fn needs_unescape_unicode_codepoints(input: &str) -> bool { - let bytes = input.as_bytes(); - for i in 1..bytes.len() { - if (bytes[i] == b'u' || bytes[i] == b'U') && bytes[i - 1] == b'\\' { - return true; - } - } - false -} - -struct UnescapeUnicodeCharIterator<'a> { - iter: Chars<'a>, - buffer: String, -} - -impl<'a> UnescapeUnicodeCharIterator<'a> { - fn new(string: &'a str) -> Self { - Self { - iter: string.chars(), - buffer: String::with_capacity(9), - } - } -} - -impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> { - type Item = char; - - fn next(&mut self) -> Option { - if !self.buffer.is_empty() { - return Some(self.buffer.remove(0)); - } - match self.iter.next()? { - '\\' => match self.iter.next() { - Some('u') => { - self.buffer.push('u'); - for _ in 0..4 { - if let Some(c) = self.iter.next() { - self.buffer.push(c); - } else { - return Some('\\'); - } - } - if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16) - .ok() - .and_then(char::from_u32) - { - self.buffer.clear(); - Some(c) - } else { - Some('\\') - } - } - Some('U') => { - self.buffer.push('U'); - for _ in 0..8 { - if let Some(c) = self.iter.next() { - self.buffer.push(c); - } else { - return Some('\\'); - } - } - if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16) - .ok() - .and_then(char::from_u32) - { - self.buffer.clear(); - Some(c) - } else { - Some('\\') - } - } - Some(c) => { - self.buffer.push(c); - Some('\\') - } - None => Some('\\'), - }, - c => Some(c), - } - } -} - -pub fn unescape_characters<'a>( - input: &'a str, - characters: &'static [u8], - replacement: &'static StaticSliceMap, -) -> Cow<'a, str> { - if needs_unescape_characters(input, characters) { - UnescapeCharsIterator::new(input, replacement).collect() - } else { - input.into() - } -} - -fn needs_unescape_characters(input: &str, characters: &[u8]) -> bool { - let bytes = input.as_bytes(); - for i in 1..bytes.len() { - if bytes[i - 1] == b'\\' && characters.contains(&bytes[i]) { - return true; - } - } - false -} - -struct UnescapeCharsIterator<'a> { - iter: Chars<'a>, - buffer: Option, - replacement: &'static StaticSliceMap, -} - -impl<'a> UnescapeCharsIterator<'a> { - fn new(string: &'a str, replacement: &'static StaticSliceMap) -> Self { - Self { - iter: string.chars(), - buffer: None, - replacement, - } - } -} - -impl<'a> Iterator for UnescapeCharsIterator<'a> { - type Item = char; - - fn next(&mut self) -> Option { - if let Some(ch) = self.buffer { - self.buffer = None; - return Some(ch); - } - match self.iter.next()? { - '\\' => match self.iter.next() { - Some(ch) => match self.replacement.get(ch) { - Some(replace) => Some(replace), - None => { - self.buffer = Some(ch); - Some('\\') - } - }, - None => Some('\\'), - }, - c => Some(c), - } - } -} diff --git a/lib/src/sparql/algebra.rs b/lib/src/sparql/algebra.rs index a362bcaa..baa9e8d2 100644 --- a/lib/src/sparql/algebra.rs +++ b/lib/src/sparql/algebra.rs @@ -1,10 +1,10 @@ //! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) AST use crate::model::*; -use crate::utils::Escaper; use crate::Result; use failure::format_err; use lazy_static::lazy_static; +use rio_api::model as rio; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt; @@ -1362,11 +1362,11 @@ impl fmt::Display for Aggregation { sep.as_ref() .map(|s| { write!( - f, - "Aggregation(Distinct({}), GroupConcat, {{\"separator\" → \"{}\"}})", - e, - s.escape() - ) + f, + "Aggregation(Distinct({}), GroupConcat, {{\"separator\" → {}}})", + e, + fmt_str(s) + ) }) .unwrap_or_else(|| { write!(f, "Aggregation(Distinct({}), GroupConcat, {{}})", e) @@ -1376,9 +1376,9 @@ impl fmt::Display for Aggregation { .map(|s| { write!( f, - "Aggregation({}, GroupConcat, {{\"separator\" → \"{}\"}})", + "Aggregation({}, GroupConcat, {{\"separator\" → {}}})", e, - s.escape() + fmt_str(s) ) }) .unwrap_or_else(|| { @@ -1448,9 +1448,9 @@ impl<'a> fmt::Display for SparqlAggregation<'a> { if let Some(sep) = sep { write!( f, - "GROUP_CONCAT(DISTINCT {}; SEPARATOR = \"{}\")", + "GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})", SparqlExpression(e), - sep.escape() + fmt_str(sep) ) } else { write!(f, "GROUP_CONCAT(DISTINCT {})", SparqlExpression(e)) @@ -1458,9 +1458,9 @@ impl<'a> fmt::Display for SparqlAggregation<'a> { } else if let Some(sep) = sep { write!( f, - "GROUP_CONCAT({}; SEPARATOR = \"{}\")", + "GROUP_CONCAT({}; SEPARATOR = {})", SparqlExpression(e), - sep.escape() + fmt_str(sep) ) } else { write!(f, "GROUP_CONCAT({})", SparqlExpression(e)) @@ -1470,6 +1470,12 @@ impl<'a> fmt::Display for SparqlAggregation<'a> { } } +fn fmt_str(value: &str) -> rio::Literal { + rio::Literal::Simple { + value: value.into(), + } +} + #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum OrderComparator { Asc(Expression), diff --git a/lib/src/sparql/parser.rs b/lib/src/sparql/parser.rs index e70c378d..f3eeff96 100644 --- a/lib/src/sparql/parser.rs +++ b/lib/src/sparql/parser.rs @@ -11,16 +11,15 @@ mod grammar { )] use crate::model::*; - use crate::rio::utils::unescape_characters; - use crate::rio::utils::unescape_unicode_codepoints; use crate::sparql::algebra::*; - use crate::utils::StaticSliceMap; use lazy_static::lazy_static; use std::borrow::Cow; + use std::char; use std::collections::BTreeMap; use std::collections::HashMap; use std::io::BufReader; use std::io::Read; + use std::str::Chars; use url::ParseOptions; use url::Url; @@ -315,6 +314,183 @@ mod grammar { } } + pub fn unescape_unicode_codepoints(input: &str) -> Cow<'_, str> { + if needs_unescape_unicode_codepoints(input) { + UnescapeUnicodeCharIterator::new(input).collect() + } else { + input.into() + } + } + + fn needs_unescape_unicode_codepoints(input: &str) -> bool { + let bytes = input.as_bytes(); + for i in 1..bytes.len() { + if (bytes[i] == b'u' || bytes[i] == b'U') && bytes[i - 1] == b'\\' { + return true; + } + } + false + } + + struct UnescapeUnicodeCharIterator<'a> { + iter: Chars<'a>, + buffer: String, + } + + impl<'a> UnescapeUnicodeCharIterator<'a> { + fn new(string: &'a str) -> Self { + Self { + iter: string.chars(), + buffer: String::with_capacity(9), + } + } + } + + impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> { + type Item = char; + + fn next(&mut self) -> Option { + if !self.buffer.is_empty() { + return Some(self.buffer.remove(0)); + } + match self.iter.next()? { + '\\' => match self.iter.next() { + Some('u') => { + self.buffer.push('u'); + for _ in 0..4 { + if let Some(c) = self.iter.next() { + self.buffer.push(c); + } else { + return Some('\\'); + } + } + if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16) + .ok() + .and_then(char::from_u32) + { + self.buffer.clear(); + Some(c) + } else { + Some('\\') + } + } + Some('U') => { + self.buffer.push('U'); + for _ in 0..8 { + if let Some(c) = self.iter.next() { + self.buffer.push(c); + } else { + return Some('\\'); + } + } + if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16) + .ok() + .and_then(char::from_u32) + { + self.buffer.clear(); + Some(c) + } else { + Some('\\') + } + } + Some(c) => { + self.buffer.push(c); + Some('\\') + } + None => Some('\\'), + }, + c => Some(c), + } + } + } + + pub fn unescape_characters<'a>( + input: &'a str, + characters: &'static [u8], + replacement: &'static StaticSliceMap, + ) -> Cow<'a, str> { + if needs_unescape_characters(input, characters) { + UnescapeCharsIterator::new(input, replacement).collect() + } else { + input.into() + } + } + + fn needs_unescape_characters(input: &str, characters: &[u8]) -> bool { + let bytes = input.as_bytes(); + for i in 1..bytes.len() { + if bytes[i - 1] == b'\\' && characters.contains(&bytes[i]) { + return true; + } + } + false + } + + struct UnescapeCharsIterator<'a> { + iter: Chars<'a>, + buffer: Option, + replacement: &'static StaticSliceMap, + } + + impl<'a> UnescapeCharsIterator<'a> { + fn new(string: &'a str, replacement: &'static StaticSliceMap) -> Self { + Self { + iter: string.chars(), + buffer: None, + replacement, + } + } + } + + impl<'a> Iterator for UnescapeCharsIterator<'a> { + type Item = char; + + fn next(&mut self) -> Option { + if let Some(ch) = self.buffer { + self.buffer = None; + return Some(ch); + } + match self.iter.next()? { + '\\' => match self.iter.next() { + Some(ch) => match self.replacement.get(ch) { + Some(replace) => Some(replace), + None => { + self.buffer = Some(ch); + Some('\\') + } + }, + None => Some('\\'), + }, + c => Some(c), + } + } + } + + pub struct StaticSliceMap { + keys: &'static [K], + values: &'static [V], + } + + impl StaticSliceMap { + pub fn new(keys: &'static [K], values: &'static [V]) -> Self { + assert_eq!( + keys.len(), + values.len(), + "keys and values slices of StaticSliceMap should have the same size" + ); + Self { keys, values } + } + + pub fn get(&self, key: K) -> Option { + for i in 0..self.keys.len() { + if self.keys[i] == key { + return Some(self.values[i]); + } + } + None + } + } + const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; lazy_static! { static ref UNESCAPE_REPLACEMENT: StaticSliceMap = StaticSliceMap::new( diff --git a/lib/src/sparql/sparql_grammar.rustpeg b/lib/src/sparql/sparql_grammar.rustpeg index 62cc9f03..ed548e7f 100644 --- a/lib/src/sparql/sparql_grammar.rustpeg +++ b/lib/src/sparql/sparql_grammar.rustpeg @@ -1,6 +1,5 @@ //See https://www.w3.org/TR/turtle/#sec-grammar -use std::char; use crate::model::vocab::rdf; use crate::model::vocab::xsd; use std::str::FromStr; diff --git a/lib/src/store/memory.rs b/lib/src/store/memory.rs index d6487ef3..7be9da9a 100644 --- a/lib/src/store/memory.rs +++ b/lib/src/store/memory.rs @@ -1,7 +1,6 @@ use crate::model::LanguageTag; use crate::store::encoded::*; use crate::store::numeric_encoder::*; -use crate::utils::MutexPoisonError; use crate::Result; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 3638af03..ad3d5c6f 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -1,11 +1,12 @@ use crate::model::vocab::rdf; use crate::model::vocab::xsd; use crate::model::*; -use crate::utils::MutexPoisonError; use crate::Result; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use chrono::prelude::*; use failure::format_err; +use failure::Backtrace; +use failure::Fail; use ordered_float::OrderedFloat; use rust_decimal::Decimal; use std::collections::BTreeMap; @@ -13,6 +14,7 @@ use std::io::Read; use std::io::Write; use std::ops::Deref; use std::str; +use std::sync::PoisonError; use std::sync::RwLock; use url::Url; use uuid::Uuid; @@ -792,6 +794,20 @@ impl Default for Encoder { } } +#[derive(Debug, Fail)] +#[fail(display = "Mutex Mutex was poisoned")] +pub struct MutexPoisonError { + backtrace: Backtrace, +} + +impl From> for MutexPoisonError { + fn from(_: PoisonError) -> Self { + Self { + backtrace: Backtrace::new(), + } + } +} + #[test] fn test_encoding() { use std::str::FromStr; diff --git a/lib/src/store/rocksdb.rs b/lib/src/store/rocksdb.rs index e0a1705e..a7ece6a6 100644 --- a/lib/src/store/rocksdb.rs +++ b/lib/src/store/rocksdb.rs @@ -2,7 +2,6 @@ use crate::model::LanguageTag; use crate::store::encoded::EncodedQuadsStore; use crate::store::encoded::StoreDataset; use crate::store::numeric_encoder::*; -use crate::utils::MutexPoisonError; use crate::Result; use byteorder::ByteOrder; use byteorder::LittleEndian; diff --git a/lib/src/utils.rs b/lib/src/utils.rs deleted file mode 100644 index 4f8ad39f..00000000 --- a/lib/src/utils.rs +++ /dev/null @@ -1,129 +0,0 @@ -use failure::Backtrace; -use failure::Fail; -use std::sync::PoisonError; - -pub trait Escaper { - fn escape(&self) -> String; -} - -impl Escaper for str { - fn escape(&self) -> String { - self.chars().flat_map(EscapeRDF::new).collect() - } -} - -/// Customized version of EscapeDefault of the Rust standard library -struct EscapeRDF { - state: EscapeRdfState, -} - -enum EscapeRdfState { - Done, - Char(char), - Backslash(char), -} - -impl EscapeRDF { - fn new(c: char) -> Self { - Self { - state: match c { - '\t' => EscapeRdfState::Backslash('t'), - '\u{08}' => EscapeRdfState::Backslash('b'), - '\n' => EscapeRdfState::Backslash('n'), - '\r' => EscapeRdfState::Backslash('r'), - '\u{0C}' => EscapeRdfState::Backslash('f'), - '\\' | '\'' | '"' => EscapeRdfState::Backslash(c), - c => EscapeRdfState::Char(c), - }, - } - } -} - -impl Iterator for EscapeRDF { - type Item = char; - - fn next(&mut self) -> Option { - match self.state { - EscapeRdfState::Backslash(c) => { - self.state = EscapeRdfState::Char(c); - Some('\\') - } - EscapeRdfState::Char(c) => { - self.state = EscapeRdfState::Done; - Some(c) - } - EscapeRdfState::Done => None, - } - } - - fn size_hint(&self) -> (usize, Option) { - let n = self.len(); - (n, Some(n)) - } - - fn count(self) -> usize { - self.len() - } -} - -impl ExactSizeIterator for EscapeRDF { - fn len(&self) -> usize { - match self.state { - EscapeRdfState::Done => 0, - EscapeRdfState::Char(_) => 1, - EscapeRdfState::Backslash(_) => 2, - } - } -} - -#[test] -fn test_escaper() { - assert_eq!("foo", "foo".escape()); - assert_eq!( - "John said: \\\"Hello World!\\\"", - "John said: \"Hello World!\"".escape() - ); - assert_eq!( - "John said: \\\"Hello World!\\\\\\\"", - "John said: \"Hello World!\\\"".escape() - ); -} - -pub struct StaticSliceMap { - keys: &'static [K], - values: &'static [V], -} - -impl StaticSliceMap { - pub fn new(keys: &'static [K], values: &'static [V]) -> Self { - assert_eq!( - keys.len(), - values.len(), - "keys and values slices of StaticSliceMap should have the same size" - ); - Self { keys, values } - } - - pub fn get(&self, key: K) -> Option { - for i in 0..self.keys.len() { - if self.keys[i] == key { - return Some(self.values[i]); - } - } - None - } -} - -#[derive(Debug, Fail)] -#[fail(display = "Mutex Mutex was poisoned")] -pub struct MutexPoisonError { - backtrace: Backtrace, -} - -impl From> for MutexPoisonError { - fn from(_: PoisonError) -> Self { - Self { - backtrace: Backtrace::new(), - } - } -}