Removes encoded string constants

pull/46/head
Tpt 4 years ago
parent 669d3b1494
commit 3b207f7239
  1. 72
      lib/src/sparql/eval.rs
  2. 8
      lib/src/sparql/plan.rs
  3. 4
      lib/src/sparql/plan_builder.rs
  4. 10
      lib/src/store/memory.rs
  5. 138
      lib/src/store/numeric_encoder.rs
  6. 10
      lib/src/store/rocksdb.rs
  7. 6
      lib/src/store/sled.rs

@ -996,7 +996,7 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
value_id: language_id,
})
}
e if e.is_literal() => Some(ENCODED_EMPTY_STRING_LITERAL),
e if e.is_literal() => self.build_string_literal(""),
_ => None,
},
PlanExpression::LangMatches(language_tag, language_range) => {
@ -1023,7 +1023,7 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
.into(),
)
}
PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(),
PlanExpression::Datatype(e) => self.datatype(self.eval_expression(e, tuple)?),
PlanExpression::Bound(v) => Some(tuple.contains(*v).into()),
PlanExpression::IRI(e) => {
let iri_id = match self.eval_expression(e, tuple)? {
@ -1225,7 +1225,7 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
if let Some(position) = (&arg1).find(arg2.as_str()) {
self.build_plain_literal(&arg1[..position], language)
} else {
Some(ENCODED_EMPTY_STRING_LITERAL)
self.build_string_literal("")
}
}
PlanExpression::StrAfter(arg1, arg2) => {
@ -1236,7 +1236,7 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
if let Some(position) = (&arg1).find(arg2.as_str()) {
self.build_plain_literal(&arg1[position + arg2.len()..], language)
} else {
Some(ENCODED_EMPTY_STRING_LITERAL)
self.build_string_literal("")
}
}
PlanExpression::Year(e) => match self.eval_expression(e, tuple)? {
@ -1285,12 +1285,12 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
EncodedTerm::DateTimeLiteral(date_time) => date_time.timezone_offset(),
_ => return None,
};
Some(match timezone_offset {
match timezone_offset {
Some(timezone_offset) => {
self.build_string_literal(&timezone_offset.to_string())?
self.build_string_literal(&timezone_offset.to_string())
}
None => ENCODED_EMPTY_STRING_LITERAL,
})
None => self.build_string_literal(""),
}
}
PlanExpression::Now => Some(self.now.into()),
PlanExpression::UUID => {
@ -1501,7 +1501,9 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
fn to_bool(&self, term: EncodedTerm) -> Option<bool> {
match term {
EncodedTerm::BooleanLiteral(value) => Some(value),
EncodedTerm::StringLiteral { .. } => Some(term != ENCODED_EMPTY_STRING_LITERAL),
EncodedTerm::StringLiteral { value_id } => {
Some(!self.dataset.get_str(value_id).ok()??.is_empty())
}
EncodedTerm::FloatLiteral(value) => Some(value != 0_f32),
EncodedTerm::DoubleLiteral(value) => Some(value != 0_f64),
EncodedTerm::IntegerLiteral(value) => Some(value != 0),
@ -1952,6 +1954,58 @@ impl<S: ReadableEncodedStore + 'static> SimpleEvaluator<S> {
let hash = hex::encode(H::new().chain(input.as_str()).finalize());
self.build_string_literal(&hash)
}
fn datatype(&self, value: EncodedTerm) -> Option<EncodedTerm> {
//TODO: optimize?
match value {
EncodedTerm::NamedNode { .. }
| EncodedTerm::NamedBlankNode { .. }
| EncodedTerm::InlineBlankNode { .. }
| EncodedTerm::DefaultGraph => None,
EncodedTerm::StringLiteral { .. } => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#string")
}
EncodedTerm::LangStringLiteral { .. } => {
self.build_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
}
EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: datatype_id,
}),
EncodedTerm::BooleanLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#boolean")
}
EncodedTerm::FloatLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#float")
}
EncodedTerm::DoubleLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#double")
}
EncodedTerm::IntegerLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#integer")
}
EncodedTerm::DecimalLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#decimal")
}
EncodedTerm::DateLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#date")
}
EncodedTerm::TimeLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#time")
}
EncodedTerm::DateTimeLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#dateTime")
}
EncodedTerm::DurationLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#duration")
}
EncodedTerm::YearMonthDurationLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#yearMonthDuration")
}
EncodedTerm::DayTimeDurationLiteral(..) => {
self.build_named_node("http://www.w3.org/2001/XMLSchema#dayTimeDuration")
}
}
}
}
enum NumericBinaryOperands {

@ -4,7 +4,7 @@ use crate::sparql::error::EvaluationError;
use crate::sparql::model::Variable;
use crate::store::numeric_encoder::{
EncodedQuad, EncodedTerm, Encoder, MemoryStrStore, StrContainer, StrHash, StrLookup,
WithStoreError, ENCODED_DEFAULT_GRAPH,
WithStoreError,
};
use crate::store::ReadableEncodedStore;
use std::cell::{RefCell, RefMut};
@ -590,10 +590,10 @@ impl<S: ReadableEncodedStore> DatasetView<S> {
)
.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => quad.graph_name != ENCODED_DEFAULT_GRAPH,
Ok(quad) => quad.graph_name != EncodedTerm::DefaultGraph,
}),
)
} else if graph_name == Some(ENCODED_DEFAULT_GRAPH) && self.default_graph_as_union {
} else if graph_name == Some(EncodedTerm::DefaultGraph) && self.default_graph_as_union {
Box::new(
map_io_err(
self.store
@ -605,7 +605,7 @@ impl<S: ReadableEncodedStore> DatasetView<S> {
quad.subject,
quad.predicate,
quad.object,
ENCODED_DEFAULT_GRAPH,
EncodedTerm::DefaultGraph,
))
}),
)

@ -3,7 +3,7 @@ use crate::sparql::algebra::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use crate::sparql::plan::*;
use crate::store::numeric_encoder::{Encoder, ENCODED_DEFAULT_GRAPH};
use crate::store::numeric_encoder::{EncodedTerm, Encoder};
use std::collections::{BTreeSet, HashSet};
use std::rc::Rc;
@ -20,7 +20,7 @@ impl<E: Encoder> PlanBuilder<E> {
let plan = PlanBuilder { encoder }.build_for_graph_pattern(
pattern,
&mut variables,
PatternValue::Constant(ENCODED_DEFAULT_GRAPH),
PatternValue::Constant(EncodedTerm::DefaultGraph),
)?;
Ok((plan, variables))
}

@ -76,11 +76,9 @@ impl Default for MemoryStore {
impl MemoryStore {
/// Constructs a new `MemoryStore`
pub fn new() -> Self {
let new = Self {
Self {
indexes: Arc::new(RwLock::default()),
};
(&new).set_first_strings().unwrap_infallible();
new
}
}
/// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/).
@ -968,7 +966,7 @@ impl<'a> MemoryTransaction<'a> {
///
/// // quad filter
/// let results: Vec<Quad> = store.quads_for_pattern(None, None, None, None).collect();
/// let ex = NamedNode::new("http://example.com")?;
/// let ex = NamedNode::new("http://example.com").unwrap();
/// assert_eq!(vec![Quad::new(ex.clone(), ex.clone(), ex.clone(), None)], results);
/// # Result::<_, oxigraph::sparql::EvaluationError>::Ok(())
/// ```
@ -998,7 +996,7 @@ impl<'a> MemoryTransaction<'a> {
///
/// // quad filter
/// let results: Vec<Quad> = store.quads_for_pattern(None, None, None, None).collect();
/// let ex = NamedNode::new("http://example.com")?;
/// let ex = NamedNode::new("http://example.com").unwrap();
/// assert_eq!(vec![Quad::new(ex.clone(), ex.clone(), ex.clone(), Some(ex.into()))], results);
/// # Result::<_, oxigraph::sparql::EvaluationError>::Ok(())
/// ```

@ -1,8 +1,6 @@
#![allow(clippy::unreadable_literal)]
use crate::error::{invalid_data_error, Infallible, UnwrapInfallible};
use crate::model::vocab::rdf;
use crate::model::vocab::xsd;
use crate::error::{invalid_data_error, Infallible};
use crate::model::xsd::*;
use crate::model::*;
use rand::random;
@ -31,10 +29,6 @@ impl StrHash {
}
}
const fn constant(hash: u128) -> Self {
Self { hash }
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
@ -48,21 +42,6 @@ impl StrHash {
}
}
const EMPTY_STRING_ID: StrHash = StrHash::constant(0xf4f2ced447ab02427de0a38047d74950);
const RDF_LANG_STRING_ID: StrHash = StrHash::constant(0x8fab6bc1501d6d114e5d4e0116f67a49);
const XSD_STRING_ID: StrHash = StrHash::constant(0xe72300970ee9bf77f2df7bdb300e3d84);
const XSD_BOOLEAN_ID: StrHash = StrHash::constant(0xfafac8b356be81954f64e70756e59e32);
const XSD_FLOAT_ID: StrHash = StrHash::constant(0x34bd4a8ede4564c36445b76e84fa7502);
const XSD_DOUBLE_ID: StrHash = StrHash::constant(0x3614a889da2f0c7616d96d01b2ff1a97);
const XSD_INTEGER_ID: StrHash = StrHash::constant(0xe2b19c79f5f04dbcdc7f52f4f7869da0);
const XSD_DECIMAL_ID: StrHash = StrHash::constant(0xb50bffedfd084528ff892173dc0d1fad);
const XSD_DATE_TIME_ID: StrHash = StrHash::constant(0xd7496e779a321ade51e92da1a5aa6cb);
const XSD_DATE_ID: StrHash = StrHash::constant(0x87c4351dea4b98f59a22f7b636d4031);
const XSD_TIME_ID: StrHash = StrHash::constant(0xc7487be3f3d27d1926b27abf005a9cd2);
const XSD_DURATION_ID: StrHash = StrHash::constant(0x226af08ea5b7e6b08ceed6030c721228);
const XSD_YEAR_MONTH_DURATION_ID: StrHash = StrHash::constant(0xc6dacde7afc0bd2f6e178d7229948191);
const XSD_DAY_TIME_DURATION_ID: StrHash = StrHash::constant(0xc8d6cfdf45e12c10bd711a76aae43bc6);
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_INLINE_BLANK_NODE_ID: u8 = 2;
@ -83,50 +62,6 @@ const TYPE_DURATION_LITERAL: u8 = 16;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18;
pub const ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph;
pub const ENCODED_EMPTY_STRING_LITERAL: EncodedTerm = EncodedTerm::StringLiteral {
value_id: EMPTY_STRING_ID,
};
pub const ENCODED_RDF_LANG_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: RDF_LANG_STRING_ID,
};
pub const ENCODED_XSD_STRING_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_STRING_ID,
};
pub const ENCODED_XSD_BOOLEAN_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_BOOLEAN_ID,
};
pub const ENCODED_XSD_FLOAT_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_FLOAT_ID,
};
pub const ENCODED_XSD_DOUBLE_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DOUBLE_ID,
};
pub const ENCODED_XSD_INTEGER_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_INTEGER_ID,
};
pub const ENCODED_XSD_DECIMAL_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DECIMAL_ID,
};
pub const ENCODED_XSD_DATE_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DATE_ID,
};
pub const ENCODED_XSD_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_TIME_ID,
};
pub const ENCODED_XSD_DATE_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DATE_TIME_ID,
};
pub const ENCODED_XSD_DURATION_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DURATION_ID,
};
pub const ENCODED_XSD_YEAR_MONTH_DURATION_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_YEAR_MONTH_DURATION_ID,
};
pub const ENCODED_XSD_DAY_TIME_DURATION_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode {
iri_id: XSD_DAY_TIME_DURATION_ID,
};
#[derive(Debug, Clone, Copy)]
pub enum EncodedTerm {
DefaultGraph,
@ -314,32 +249,6 @@ impl EncodedTerm {
}
}
pub fn datatype(&self) -> Option<Self> {
match self {
EncodedTerm::StringLiteral { .. } => Some(ENCODED_XSD_STRING_NAMED_NODE),
EncodedTerm::LangStringLiteral { .. } => Some(ENCODED_RDF_LANG_STRING_NAMED_NODE),
EncodedTerm::TypedLiteral { datatype_id, .. } => Some(EncodedTerm::NamedNode {
iri_id: *datatype_id,
}),
EncodedTerm::BooleanLiteral(..) => Some(ENCODED_XSD_BOOLEAN_NAMED_NODE),
EncodedTerm::FloatLiteral(..) => Some(ENCODED_XSD_FLOAT_NAMED_NODE),
EncodedTerm::DoubleLiteral(..) => Some(ENCODED_XSD_DOUBLE_NAMED_NODE),
EncodedTerm::IntegerLiteral(..) => Some(ENCODED_XSD_INTEGER_NAMED_NODE),
EncodedTerm::DecimalLiteral(..) => Some(ENCODED_XSD_DECIMAL_NAMED_NODE),
EncodedTerm::DateLiteral(..) => Some(ENCODED_XSD_DATE_NAMED_NODE),
EncodedTerm::TimeLiteral(..) => Some(ENCODED_XSD_TIME_NAMED_NODE),
EncodedTerm::DateTimeLiteral(..) => Some(ENCODED_XSD_DATE_TIME_NAMED_NODE),
EncodedTerm::DurationLiteral(..) => Some(ENCODED_XSD_DURATION_NAMED_NODE),
EncodedTerm::YearMonthDurationLiteral(..) => {
Some(ENCODED_XSD_YEAR_MONTH_DURATION_NAMED_NODE)
}
EncodedTerm::DayTimeDurationLiteral(..) => {
Some(ENCODED_XSD_DAY_TIME_DURATION_NAMED_NODE)
}
_ => None,
}
}
fn type_id(&self) -> u8 {
match self {
EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID,
@ -566,7 +475,7 @@ impl From<&GraphName> for EncodedTerm {
match node {
GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => ENCODED_DEFAULT_GRAPH,
GraphName::DefaultGraph => EncodedTerm::DefaultGraph,
}
}
}
@ -863,28 +772,6 @@ pub(crate) trait StrLookup: WithStoreError {
pub(crate) trait StrContainer: WithStoreError {
fn insert_str(&mut self, key: StrHash, value: &str) -> Result<(), Self::Error>;
/// Should be called when the bytes store is created
fn set_first_strings(&mut self) -> Result<(), Self::Error> {
self.insert_str(EMPTY_STRING_ID, "")?;
self.insert_str(RDF_LANG_STRING_ID, rdf::LANG_STRING.as_str())?;
self.insert_str(XSD_STRING_ID, xsd::STRING.as_str())?;
self.insert_str(XSD_BOOLEAN_ID, xsd::BOOLEAN.as_str())?;
self.insert_str(XSD_FLOAT_ID, xsd::FLOAT.as_str())?;
self.insert_str(XSD_DOUBLE_ID, xsd::DOUBLE.as_str())?;
self.insert_str(XSD_INTEGER_ID, xsd::INTEGER.as_str())?;
self.insert_str(XSD_DECIMAL_ID, xsd::DECIMAL.as_str())?;
self.insert_str(XSD_DATE_TIME_ID, xsd::DATE_TIME.as_str())?;
self.insert_str(XSD_DATE_ID, xsd::DATE.as_str())?;
self.insert_str(XSD_TIME_ID, xsd::TIME.as_str())?;
self.insert_str(XSD_DURATION_ID, xsd::DURATION.as_str())?;
self.insert_str(
XSD_YEAR_MONTH_DURATION_ID,
xsd::YEAR_MONTH_DURATION.as_str(),
)?;
self.insert_str(XSD_DAY_TIME_DURATION_ID, xsd::DAY_TIME_DURATION.as_str())?;
Ok(())
}
}
pub struct MemoryStrStore {
@ -893,11 +780,9 @@ pub struct MemoryStrStore {
impl Default for MemoryStrStore {
fn default() -> Self {
let mut new = Self {
Self {
id2str: HashMap::default(),
};
new.set_first_strings().unwrap_infallible();
new
}
}
}
@ -952,7 +837,7 @@ pub(crate) trait Encoder: WithStoreError {
match name {
GraphName::NamedNode(named_node) => self.encode_named_node(named_node),
GraphName::BlankNode(blank_node) => self.encode_blank_node(blank_node),
GraphName::DefaultGraph => Ok(ENCODED_DEFAULT_GRAPH),
GraphName::DefaultGraph => Ok(EncodedTerm::DefaultGraph),
}
}
@ -1028,7 +913,7 @@ pub(crate) trait Encoder: WithStoreError {
object: self.encode_rio_term(quad.object, bnodes_map)?,
graph_name: match quad.graph_name {
Some(graph_name) => self.encode_rio_named_or_blank_node(graph_name, bnodes_map)?,
None => ENCODED_DEFAULT_GRAPH,
None => EncodedTerm::DefaultGraph,
},
})
}
@ -1319,6 +1204,8 @@ fn get_required_str(lookup: &impl StrLookup, id: StrHash) -> Result<String, io::
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
let mut store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
@ -1350,12 +1237,3 @@ fn test_encoding() {
assert_eq!(encoded, EncodedTerm::from(&term));
}
}
#[test]
fn test_str_hash() {
assert_eq!(StrHash::new(""), EMPTY_STRING_ID);
assert_eq!(
StrHash::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"),
RDF_LANG_STRING_ID
);
}

@ -79,15 +79,9 @@ impl RocksDbStore {
options.create_missing_column_families(true);
options.set_compaction_style(DBCompactionStyle::Universal);
let new = Self {
Ok(Self {
db: Arc::new(DB::open_cf(&options, path, &COLUMN_FAMILIES).map_err(map_err)?),
};
let mut transaction = new.auto_batch_writer();
transaction.set_first_strings()?;
transaction.apply()?;
Ok(new)
})
}
/// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/).

@ -77,12 +77,10 @@ impl SledStore {
fn do_open(config: &Config) -> Result<Self, io::Error> {
let db = config.open()?;
let new = Self {
Ok(Self {
id2str: db.open_tree("id2str")?,
quads: db.open_tree("quads")?,
};
DirectWriter::new(&new).set_first_strings()?;
Ok(new)
})
}
/// Executes a [SPARQL 1.1 query](https://www.w3.org/TR/sparql11-query/).

Loading…
Cancel
Save