Removes utils file

pull/10/head
Tpt 5 years ago
parent ce1c198552
commit f78121f9d3
  1. 1
      lib/src/lib.rs
  2. 25
      lib/src/model/literal.rs
  3. 6
      lib/src/model/named_node.rs
  4. 1
      lib/src/rio/mod.rs
  5. 156
      lib/src/rio/utils.rs
  6. 24
      lib/src/sparql/algebra.rs
  7. 182
      lib/src/sparql/parser.rs
  8. 1
      lib/src/sparql/sparql_grammar.rustpeg
  9. 1
      lib/src/store/memory.rs
  10. 18
      lib/src/store/numeric_encoder.rs
  11. 1
      lib/src/store/rocksdb.rs
  12. 129
      lib/src/utils.rs

@ -41,7 +41,6 @@ pub mod model;
pub mod rio; pub mod rio;
pub mod sparql; pub mod sparql;
pub mod store; pub mod store;
mod utils;
pub use failure::Error; pub use failure::Error;
pub type Result<T> = ::std::result::Result<T, failure::Error>; pub type Result<T> = ::std::result::Result<T, failure::Error>;

@ -2,13 +2,13 @@ use crate::model::language_tag::LanguageTag;
use crate::model::named_node::NamedNode; use crate::model::named_node::NamedNode;
use crate::model::vocab::rdf; use crate::model::vocab::rdf;
use crate::model::vocab::xsd; use crate::model::vocab::xsd;
use crate::utils::Escaper;
use chrono::prelude::*; use chrono::prelude::*;
use num_traits::identities::Zero; use num_traits::identities::Zero;
use num_traits::FromPrimitive; use num_traits::FromPrimitive;
use num_traits::One; use num_traits::One;
use num_traits::ToPrimitive; use num_traits::ToPrimitive;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use rio_api::model as rio;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::borrow::Cow; use std::borrow::Cow;
use std::fmt; use std::fmt;
@ -393,10 +393,27 @@ impl fmt::Display for Literal {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_plain() { if self.is_plain() {
self.language() self.language()
.map(|lang| write!(f, "\"{}\"@{}", self.value().escape(), lang)) .map(|lang| {
.unwrap_or_else(|| write!(f, "\"{}\"", self.value().escape())) rio::Literal::LanguageTaggedString {
value: &self.value(),
language: lang.as_str(),
}
.fmt(f)
})
.unwrap_or_else(|| {
rio::Literal::Simple {
value: &self.value(),
}
.fmt(f)
})
} else { } else {
write!(f, "\"{}\"^^{}", self.value().escape(), self.datatype()) rio::Literal::Typed {
value: &self.value(),
datatype: rio::NamedNode {
iri: self.datatype().as_str(),
},
}
.fmt(f)
} }
} }
} }

@ -1,5 +1,6 @@
use crate::Error; use crate::Error;
use crate::Result; use crate::Result;
use rio_api::model as rio;
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
@ -28,7 +29,10 @@ pub struct NamedNode {
impl fmt::Display for NamedNode { impl fmt::Display for NamedNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<{}>", self.iri) rio::NamedNode {
iri: self.iri.as_str(),
}
.fmt(f)
} }
} }

@ -3,5 +3,4 @@
pub mod ntriples; pub mod ntriples;
mod rio; mod rio;
pub mod turtle; pub mod turtle;
pub(crate) mod utils;
pub mod xml; pub mod xml;

@ -1,156 +0,0 @@
use crate::utils::StaticSliceMap;
use std::borrow::Cow;
use std::char;
use std::str::Chars;
pub fn unescape_unicode_codepoints(input: &str) -> Cow<'_, str> {
if needs_unescape_unicode_codepoints(input) {
UnescapeUnicodeCharIterator::new(input).collect()
} else {
input.into()
}
}
fn needs_unescape_unicode_codepoints(input: &str) -> bool {
let bytes = input.as_bytes();
for i in 1..bytes.len() {
if (bytes[i] == b'u' || bytes[i] == b'U') && bytes[i - 1] == b'\\' {
return true;
}
}
false
}
struct UnescapeUnicodeCharIterator<'a> {
iter: Chars<'a>,
buffer: String,
}
impl<'a> UnescapeUnicodeCharIterator<'a> {
fn new(string: &'a str) -> Self {
Self {
iter: string.chars(),
buffer: String::with_capacity(9),
}
}
}
impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
if !self.buffer.is_empty() {
return Some(self.buffer.remove(0));
}
match self.iter.next()? {
'\\' => match self.iter.next() {
Some('u') => {
self.buffer.push('u');
for _ in 0..4 {
if let Some(c) = self.iter.next() {
self.buffer.push(c);
} else {
return Some('\\');
}
}
if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16)
.ok()
.and_then(char::from_u32)
{
self.buffer.clear();
Some(c)
} else {
Some('\\')
}
}
Some('U') => {
self.buffer.push('U');
for _ in 0..8 {
if let Some(c) = self.iter.next() {
self.buffer.push(c);
} else {
return Some('\\');
}
}
if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16)
.ok()
.and_then(char::from_u32)
{
self.buffer.clear();
Some(c)
} else {
Some('\\')
}
}
Some(c) => {
self.buffer.push(c);
Some('\\')
}
None => Some('\\'),
},
c => Some(c),
}
}
}
pub fn unescape_characters<'a>(
input: &'a str,
characters: &'static [u8],
replacement: &'static StaticSliceMap<char, char>,
) -> Cow<'a, str> {
if needs_unescape_characters(input, characters) {
UnescapeCharsIterator::new(input, replacement).collect()
} else {
input.into()
}
}
fn needs_unescape_characters(input: &str, characters: &[u8]) -> bool {
let bytes = input.as_bytes();
for i in 1..bytes.len() {
if bytes[i - 1] == b'\\' && characters.contains(&bytes[i]) {
return true;
}
}
false
}
struct UnescapeCharsIterator<'a> {
iter: Chars<'a>,
buffer: Option<char>,
replacement: &'static StaticSliceMap<char, char>,
}
impl<'a> UnescapeCharsIterator<'a> {
fn new(string: &'a str, replacement: &'static StaticSliceMap<char, char>) -> Self {
Self {
iter: string.chars(),
buffer: None,
replacement,
}
}
}
impl<'a> Iterator for UnescapeCharsIterator<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
if let Some(ch) = self.buffer {
self.buffer = None;
return Some(ch);
}
match self.iter.next()? {
'\\' => match self.iter.next() {
Some(ch) => match self.replacement.get(ch) {
Some(replace) => Some(replace),
None => {
self.buffer = Some(ch);
Some('\\')
}
},
None => Some('\\'),
},
c => Some(c),
}
}
}

@ -1,10 +1,10 @@
//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) AST //! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) AST
use crate::model::*; use crate::model::*;
use crate::utils::Escaper;
use crate::Result; use crate::Result;
use failure::format_err; use failure::format_err;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use rio_api::model as rio;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt; use std::fmt;
@ -1363,9 +1363,9 @@ impl fmt::Display for Aggregation {
.map(|s| { .map(|s| {
write!( write!(
f, f,
"Aggregation(Distinct({}), GroupConcat, {{\"separator\" → \"{}\"}})", "Aggregation(Distinct({}), GroupConcat, {{\"separator\" → {}}})",
e, e,
s.escape() fmt_str(s)
) )
}) })
.unwrap_or_else(|| { .unwrap_or_else(|| {
@ -1376,9 +1376,9 @@ impl fmt::Display for Aggregation {
.map(|s| { .map(|s| {
write!( write!(
f, f,
"Aggregation({}, GroupConcat, {{\"separator\" → \"{}\"}})", "Aggregation({}, GroupConcat, {{\"separator\" → {}}})",
e, e,
s.escape() fmt_str(s)
) )
}) })
.unwrap_or_else(|| { .unwrap_or_else(|| {
@ -1448,9 +1448,9 @@ impl<'a> fmt::Display for SparqlAggregation<'a> {
if let Some(sep) = sep { if let Some(sep) = sep {
write!( write!(
f, f,
"GROUP_CONCAT(DISTINCT {}; SEPARATOR = \"{}\")", "GROUP_CONCAT(DISTINCT {}; SEPARATOR = {})",
SparqlExpression(e), SparqlExpression(e),
sep.escape() fmt_str(sep)
) )
} else { } else {
write!(f, "GROUP_CONCAT(DISTINCT {})", SparqlExpression(e)) write!(f, "GROUP_CONCAT(DISTINCT {})", SparqlExpression(e))
@ -1458,9 +1458,9 @@ impl<'a> fmt::Display for SparqlAggregation<'a> {
} else if let Some(sep) = sep { } else if let Some(sep) = sep {
write!( write!(
f, f,
"GROUP_CONCAT({}; SEPARATOR = \"{}\")", "GROUP_CONCAT({}; SEPARATOR = {})",
SparqlExpression(e), SparqlExpression(e),
sep.escape() fmt_str(sep)
) )
} else { } else {
write!(f, "GROUP_CONCAT({})", SparqlExpression(e)) write!(f, "GROUP_CONCAT({})", SparqlExpression(e))
@ -1470,6 +1470,12 @@ impl<'a> fmt::Display for SparqlAggregation<'a> {
} }
} }
fn fmt_str(value: &str) -> rio::Literal {
rio::Literal::Simple {
value: value.into(),
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum OrderComparator { pub enum OrderComparator {
Asc(Expression), Asc(Expression),

@ -11,16 +11,15 @@ mod grammar {
)] )]
use crate::model::*; use crate::model::*;
use crate::rio::utils::unescape_characters;
use crate::rio::utils::unescape_unicode_codepoints;
use crate::sparql::algebra::*; use crate::sparql::algebra::*;
use crate::utils::StaticSliceMap;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::borrow::Cow; use std::borrow::Cow;
use std::char;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io::BufReader;
use std::io::Read; use std::io::Read;
use std::str::Chars;
use url::ParseOptions; use url::ParseOptions;
use url::Url; use url::Url;
@ -315,6 +314,183 @@ mod grammar {
} }
} }
pub fn unescape_unicode_codepoints(input: &str) -> Cow<'_, str> {
if needs_unescape_unicode_codepoints(input) {
UnescapeUnicodeCharIterator::new(input).collect()
} else {
input.into()
}
}
fn needs_unescape_unicode_codepoints(input: &str) -> bool {
let bytes = input.as_bytes();
for i in 1..bytes.len() {
if (bytes[i] == b'u' || bytes[i] == b'U') && bytes[i - 1] == b'\\' {
return true;
}
}
false
}
struct UnescapeUnicodeCharIterator<'a> {
iter: Chars<'a>,
buffer: String,
}
impl<'a> UnescapeUnicodeCharIterator<'a> {
fn new(string: &'a str) -> Self {
Self {
iter: string.chars(),
buffer: String::with_capacity(9),
}
}
}
impl<'a> Iterator for UnescapeUnicodeCharIterator<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
if !self.buffer.is_empty() {
return Some(self.buffer.remove(0));
}
match self.iter.next()? {
'\\' => match self.iter.next() {
Some('u') => {
self.buffer.push('u');
for _ in 0..4 {
if let Some(c) = self.iter.next() {
self.buffer.push(c);
} else {
return Some('\\');
}
}
if let Some(c) = u32::from_str_radix(&self.buffer[1..5], 16)
.ok()
.and_then(char::from_u32)
{
self.buffer.clear();
Some(c)
} else {
Some('\\')
}
}
Some('U') => {
self.buffer.push('U');
for _ in 0..8 {
if let Some(c) = self.iter.next() {
self.buffer.push(c);
} else {
return Some('\\');
}
}
if let Some(c) = u32::from_str_radix(&self.buffer[1..9], 16)
.ok()
.and_then(char::from_u32)
{
self.buffer.clear();
Some(c)
} else {
Some('\\')
}
}
Some(c) => {
self.buffer.push(c);
Some('\\')
}
None => Some('\\'),
},
c => Some(c),
}
}
}
pub fn unescape_characters<'a>(
input: &'a str,
characters: &'static [u8],
replacement: &'static StaticSliceMap<char, char>,
) -> Cow<'a, str> {
if needs_unescape_characters(input, characters) {
UnescapeCharsIterator::new(input, replacement).collect()
} else {
input.into()
}
}
fn needs_unescape_characters(input: &str, characters: &[u8]) -> bool {
let bytes = input.as_bytes();
for i in 1..bytes.len() {
if bytes[i - 1] == b'\\' && characters.contains(&bytes[i]) {
return true;
}
}
false
}
struct UnescapeCharsIterator<'a> {
iter: Chars<'a>,
buffer: Option<char>,
replacement: &'static StaticSliceMap<char, char>,
}
impl<'a> UnescapeCharsIterator<'a> {
fn new(string: &'a str, replacement: &'static StaticSliceMap<char, char>) -> Self {
Self {
iter: string.chars(),
buffer: None,
replacement,
}
}
}
impl<'a> Iterator for UnescapeCharsIterator<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
if let Some(ch) = self.buffer {
self.buffer = None;
return Some(ch);
}
match self.iter.next()? {
'\\' => match self.iter.next() {
Some(ch) => match self.replacement.get(ch) {
Some(replace) => Some(replace),
None => {
self.buffer = Some(ch);
Some('\\')
}
},
None => Some('\\'),
},
c => Some(c),
}
}
}
pub struct StaticSliceMap<K: 'static + Copy + Eq, V: 'static + Copy> {
keys: &'static [K],
values: &'static [V],
}
impl<K: 'static + Copy + Eq, V: 'static + Copy> StaticSliceMap<K, V> {
pub fn new(keys: &'static [K], values: &'static [V]) -> Self {
assert_eq!(
keys.len(),
values.len(),
"keys and values slices of StaticSliceMap should have the same size"
);
Self { keys, values }
}
pub fn get(&self, key: K) -> Option<V> {
for i in 0..self.keys.len() {
if self.keys[i] == key {
return Some(self.values[i]);
}
}
None
}
}
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\'];
lazy_static! { lazy_static! {
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new( static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(

@ -1,6 +1,5 @@
//See https://www.w3.org/TR/turtle/#sec-grammar //See https://www.w3.org/TR/turtle/#sec-grammar
use std::char;
use crate::model::vocab::rdf; use crate::model::vocab::rdf;
use crate::model::vocab::xsd; use crate::model::vocab::xsd;
use std::str::FromStr; use std::str::FromStr;

@ -1,7 +1,6 @@
use crate::model::LanguageTag; use crate::model::LanguageTag;
use crate::store::encoded::*; use crate::store::encoded::*;
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::utils::MutexPoisonError;
use crate::Result; use crate::Result;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet; use std::collections::BTreeSet;

@ -1,11 +1,12 @@
use crate::model::vocab::rdf; use crate::model::vocab::rdf;
use crate::model::vocab::xsd; use crate::model::vocab::xsd;
use crate::model::*; use crate::model::*;
use crate::utils::MutexPoisonError;
use crate::Result; use crate::Result;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use chrono::prelude::*; use chrono::prelude::*;
use failure::format_err; use failure::format_err;
use failure::Backtrace;
use failure::Fail;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::collections::BTreeMap; use std::collections::BTreeMap;
@ -13,6 +14,7 @@ use std::io::Read;
use std::io::Write; use std::io::Write;
use std::ops::Deref; use std::ops::Deref;
use std::str; use std::str;
use std::sync::PoisonError;
use std::sync::RwLock; use std::sync::RwLock;
use url::Url; use url::Url;
use uuid::Uuid; use uuid::Uuid;
@ -792,6 +794,20 @@ impl<S: StringStore + Default> Default for Encoder<S> {
} }
} }
#[derive(Debug, Fail)]
#[fail(display = "Mutex Mutex was poisoned")]
pub struct MutexPoisonError {
backtrace: Backtrace,
}
impl<T> From<PoisonError<T>> for MutexPoisonError {
fn from(_: PoisonError<T>) -> Self {
Self {
backtrace: Backtrace::new(),
}
}
}
#[test] #[test]
fn test_encoding() { fn test_encoding() {
use std::str::FromStr; use std::str::FromStr;

@ -2,7 +2,6 @@ use crate::model::LanguageTag;
use crate::store::encoded::EncodedQuadsStore; use crate::store::encoded::EncodedQuadsStore;
use crate::store::encoded::StoreDataset; use crate::store::encoded::StoreDataset;
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::utils::MutexPoisonError;
use crate::Result; use crate::Result;
use byteorder::ByteOrder; use byteorder::ByteOrder;
use byteorder::LittleEndian; use byteorder::LittleEndian;

@ -1,129 +0,0 @@
use failure::Backtrace;
use failure::Fail;
use std::sync::PoisonError;
pub trait Escaper {
fn escape(&self) -> String;
}
impl Escaper for str {
fn escape(&self) -> String {
self.chars().flat_map(EscapeRDF::new).collect()
}
}
/// Customized version of EscapeDefault of the Rust standard library
struct EscapeRDF {
state: EscapeRdfState,
}
enum EscapeRdfState {
Done,
Char(char),
Backslash(char),
}
impl EscapeRDF {
fn new(c: char) -> Self {
Self {
state: match c {
'\t' => EscapeRdfState::Backslash('t'),
'\u{08}' => EscapeRdfState::Backslash('b'),
'\n' => EscapeRdfState::Backslash('n'),
'\r' => EscapeRdfState::Backslash('r'),
'\u{0C}' => EscapeRdfState::Backslash('f'),
'\\' | '\'' | '"' => EscapeRdfState::Backslash(c),
c => EscapeRdfState::Char(c),
},
}
}
}
impl Iterator for EscapeRDF {
type Item = char;
fn next(&mut self) -> Option<char> {
match self.state {
EscapeRdfState::Backslash(c) => {
self.state = EscapeRdfState::Char(c);
Some('\\')
}
EscapeRdfState::Char(c) => {
self.state = EscapeRdfState::Done;
Some(c)
}
EscapeRdfState::Done => None,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let n = self.len();
(n, Some(n))
}
fn count(self) -> usize {
self.len()
}
}
impl ExactSizeIterator for EscapeRDF {
fn len(&self) -> usize {
match self.state {
EscapeRdfState::Done => 0,
EscapeRdfState::Char(_) => 1,
EscapeRdfState::Backslash(_) => 2,
}
}
}
#[test]
fn test_escaper() {
assert_eq!("foo", "foo".escape());
assert_eq!(
"John said: \\\"Hello World!\\\"",
"John said: \"Hello World!\"".escape()
);
assert_eq!(
"John said: \\\"Hello World!\\\\\\\"",
"John said: \"Hello World!\\\"".escape()
);
}
pub struct StaticSliceMap<K: 'static + Copy + Eq, V: 'static + Copy> {
keys: &'static [K],
values: &'static [V],
}
impl<K: 'static + Copy + Eq, V: 'static + Copy> StaticSliceMap<K, V> {
pub fn new(keys: &'static [K], values: &'static [V]) -> Self {
assert_eq!(
keys.len(),
values.len(),
"keys and values slices of StaticSliceMap should have the same size"
);
Self { keys, values }
}
pub fn get(&self, key: K) -> Option<V> {
for i in 0..self.keys.len() {
if self.keys[i] == key {
return Some(self.values[i]);
}
}
None
}
}
#[derive(Debug, Fail)]
#[fail(display = "Mutex Mutex was poisoned")]
pub struct MutexPoisonError {
backtrace: Backtrace,
}
impl<T> From<PoisonError<T>> for MutexPoisonError {
fn from(_: PoisonError<T>) -> Self {
Self {
backtrace: Backtrace::new(),
}
}
}
Loading…
Cancel
Save