parent
bce06e5a05
commit
576ba213ee
@ -0,0 +1,294 @@ |
|||||||
|
// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers
|
||||||
|
// All rights reserved.
|
||||||
|
// Licensed under the Apache License, Version 2.0
|
||||||
|
// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
|
||||||
|
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
|
||||||
|
// at your option. All files in the project carrying such
|
||||||
|
// notice may not be copied, modified, or distributed except
|
||||||
|
// according to those terms.
|
||||||
|
|
||||||
|
use ng_oxigraph::oxrdf::Subject; |
||||||
|
use ng_repo::log::*; |
||||||
|
use ng_repo::types::OverlayId; |
||||||
|
|
||||||
|
use std::collections::HashMap; |
||||||
|
use std::collections::HashSet; |
||||||
|
|
||||||
|
use lazy_static::lazy_static; |
||||||
|
pub use ng_net::orm::{OrmDiff, OrmShapeType}; |
||||||
|
use ng_net::{app_protocol::*, orm::*}; |
||||||
|
use ng_oxigraph::oxrdf::Triple; |
||||||
|
use ng_repo::errors::NgError; |
||||||
|
use ng_repo::errors::VerifierError; |
||||||
|
use regex::Regex; |
||||||
|
|
||||||
|
use crate::orm::types::*; |
||||||
|
|
||||||
|
/// Heuristic:
|
||||||
|
/// Consider a string an IRI if it contains alphanumeric characters and then a colon within the first 13 characters
|
||||||
|
pub fn is_iri(s: &str) -> bool { |
||||||
|
lazy_static! { |
||||||
|
static ref IRI_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9+\.\-]{1,12}:").unwrap(); |
||||||
|
} |
||||||
|
IRI_REGEX.is_match(s) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn literal_to_sparql_str(var: OrmSchemaDataType) -> Vec<String> { |
||||||
|
match var.literals { |
||||||
|
None => [].to_vec(), |
||||||
|
Some(literals) => literals |
||||||
|
.iter() |
||||||
|
.map(|literal| match literal { |
||||||
|
BasicType::Bool(val) => { |
||||||
|
if *val { |
||||||
|
"true".to_string() |
||||||
|
} else { |
||||||
|
"false".to_string() |
||||||
|
} |
||||||
|
} |
||||||
|
BasicType::Num(number) => number.to_string(), |
||||||
|
BasicType::Str(sting) => { |
||||||
|
if is_iri(sting) { |
||||||
|
format!("<{}>", sting) |
||||||
|
} else { |
||||||
|
format!("\"{}\"", escape_literal(sting)) |
||||||
|
} |
||||||
|
} |
||||||
|
}) |
||||||
|
.collect(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn shape_type_to_sparql( |
||||||
|
schema: &OrmSchema, |
||||||
|
shape: &ShapeIri, |
||||||
|
filter_subjects: Option<Vec<String>>, |
||||||
|
) -> Result<String, NgError> { |
||||||
|
// Use a counter to generate unique variable names.
|
||||||
|
let mut var_counter = 0; |
||||||
|
fn get_new_var_name(counter: &mut i32) -> String { |
||||||
|
let name = format!("v{}", counter); |
||||||
|
*counter += 1; |
||||||
|
name |
||||||
|
} |
||||||
|
|
||||||
|
// Collect all statements to be added to the construct and where bodies.
|
||||||
|
let mut construct_statements = Vec::new(); |
||||||
|
let mut where_statements = Vec::new(); |
||||||
|
|
||||||
|
// Keep track of visited shapes while recursing to prevent infinite loops.
|
||||||
|
let mut visited_shapes: HashSet<ShapeIri> = HashSet::new(); |
||||||
|
|
||||||
|
// Recursive function to call for (nested) shapes.
|
||||||
|
fn process_shape( |
||||||
|
schema: &OrmSchema, |
||||||
|
shape: &OrmSchemaShape, |
||||||
|
subject_var_name: &str, |
||||||
|
construct_statements: &mut Vec<String>, |
||||||
|
where_statements: &mut Vec<String>, |
||||||
|
var_counter: &mut i32, |
||||||
|
visited_shapes: &mut HashSet<String>, |
||||||
|
) { |
||||||
|
// Prevent infinite recursion on cyclic schemas.
|
||||||
|
// TODO: We could handle this as IRI string reference.
|
||||||
|
if visited_shapes.contains(&shape.iri) { |
||||||
|
return; |
||||||
|
} |
||||||
|
visited_shapes.insert(shape.iri.clone()); |
||||||
|
|
||||||
|
// Add statements for each predicate.
|
||||||
|
for predicate in &shape.predicates { |
||||||
|
let mut union_branches = Vec::new(); |
||||||
|
let mut allowed_literals = Vec::new(); |
||||||
|
|
||||||
|
// Predicate constraints might have more than one acceptable data type. Traverse each.
|
||||||
|
// It is assumed that constant literals, nested shapes and regular types are not mixed.
|
||||||
|
for datatype in &predicate.dataTypes { |
||||||
|
if datatype.valType == OrmSchemaLiteralType::literal { |
||||||
|
// Collect allowed literals and as strings
|
||||||
|
// (already in SPARQL-format, e.g. `"a astring"`, `<http:ex.co/>`, `true`, or `42`).
|
||||||
|
allowed_literals.extend(literal_to_sparql_str(datatype.clone())); |
||||||
|
} else if datatype.valType == OrmSchemaLiteralType::shape { |
||||||
|
let shape_iri = &datatype.shape.clone().unwrap(); |
||||||
|
let nested_shape = schema.get(shape_iri).unwrap(); |
||||||
|
|
||||||
|
// For the current acceptable shape, add CONSTRUCT, WHERE, and recurse.
|
||||||
|
|
||||||
|
// Each shape option gets its own var.
|
||||||
|
let obj_var_name = get_new_var_name(var_counter); |
||||||
|
|
||||||
|
construct_statements.push(format!( |
||||||
|
" ?{} <{}> ?{}", |
||||||
|
subject_var_name, predicate.iri, obj_var_name |
||||||
|
)); |
||||||
|
// Those are later added to a UNION, if there is more than one shape.
|
||||||
|
union_branches.push(format!( |
||||||
|
" ?{} <{}> ?{}", |
||||||
|
subject_var_name, predicate.iri, obj_var_name |
||||||
|
)); |
||||||
|
|
||||||
|
// Recurse to add statements for nested object.
|
||||||
|
process_shape( |
||||||
|
schema, |
||||||
|
nested_shape, |
||||||
|
&obj_var_name, |
||||||
|
construct_statements, |
||||||
|
where_statements, |
||||||
|
var_counter, |
||||||
|
visited_shapes, |
||||||
|
); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// The where statement which might be wrapped in OPTIONAL.
|
||||||
|
let where_body: String; |
||||||
|
|
||||||
|
if !allowed_literals.is_empty() |
||||||
|
&& !predicate.extra.unwrap_or(false) |
||||||
|
&& predicate.minCardinality > 0 |
||||||
|
{ |
||||||
|
// If we have literal requirements and they are not optional ("extra"),
|
||||||
|
// Add CONSTRUCT, WHERE, and FILTER.
|
||||||
|
|
||||||
|
let pred_var_name = get_new_var_name(var_counter); |
||||||
|
construct_statements.push(format!( |
||||||
|
" ?{} <{}> ?{}", |
||||||
|
subject_var_name, predicate.iri, pred_var_name |
||||||
|
)); |
||||||
|
where_body = format!( |
||||||
|
" ?{s} <{p}> ?{o} . \n FILTER(?{o} IN ({lits}))", |
||||||
|
s = subject_var_name, |
||||||
|
p = predicate.iri, |
||||||
|
o = pred_var_name, |
||||||
|
lits = allowed_literals.join(", ") |
||||||
|
); |
||||||
|
} else if !union_branches.is_empty() { |
||||||
|
// We have nested shape(s) which were already added to CONSTRUCT above.
|
||||||
|
// Join them with UNION.
|
||||||
|
|
||||||
|
where_body = union_branches |
||||||
|
.into_iter() |
||||||
|
.map(|b| format!("{{\n{}\n}}", b)) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.join(" UNION "); |
||||||
|
} else { |
||||||
|
// Regular predicate data type. Just add basic CONSTRUCT and WHERE statements.
|
||||||
|
|
||||||
|
let pred_var_name = get_new_var_name(var_counter); |
||||||
|
construct_statements.push(format!( |
||||||
|
" ?{} <{}> ?{}", |
||||||
|
subject_var_name, predicate.iri, pred_var_name |
||||||
|
)); |
||||||
|
where_body = format!( |
||||||
|
" ?{} <{}> ?{}", |
||||||
|
subject_var_name, predicate.iri, pred_var_name |
||||||
|
); |
||||||
|
} |
||||||
|
|
||||||
|
// Wrap in optional, if necessary.
|
||||||
|
if predicate.minCardinality < 1 { |
||||||
|
where_statements.push(format!(" OPTIONAL {{\n{}\n }}", where_body)); |
||||||
|
} else { |
||||||
|
where_statements.push(where_body); |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
visited_shapes.remove(&shape.iri); |
||||||
|
} |
||||||
|
|
||||||
|
let root_shape = schema.get(shape).ok_or(VerifierError::InvalidOrmSchema)?; |
||||||
|
|
||||||
|
// Root subject variable name
|
||||||
|
let root_var_name = get_new_var_name(&mut var_counter); |
||||||
|
|
||||||
|
process_shape( |
||||||
|
schema, |
||||||
|
root_shape, |
||||||
|
&root_var_name, |
||||||
|
&mut construct_statements, |
||||||
|
&mut where_statements, |
||||||
|
&mut var_counter, |
||||||
|
&mut visited_shapes, |
||||||
|
); |
||||||
|
|
||||||
|
// Filter subjects, if present.
|
||||||
|
if let Some(subjects) = filter_subjects { |
||||||
|
log_debug!("filter_subjects: {:?}", subjects); |
||||||
|
let subjects_str = subjects |
||||||
|
.iter() |
||||||
|
.map(|s| format!("<{}>", s)) |
||||||
|
.collect::<Vec<_>>() |
||||||
|
.join(", "); |
||||||
|
where_statements.push(format!(" FILTER(?v0 IN ({}))", subjects_str)); |
||||||
|
} |
||||||
|
|
||||||
|
// Create query from statements.
|
||||||
|
let construct_body = construct_statements.join(" .\n"); |
||||||
|
|
||||||
|
let where_body = where_statements.join(" .\n"); |
||||||
|
|
||||||
|
Ok(format!( |
||||||
|
"CONSTRUCT {{\n{}\n}}\nWHERE {{\n{}\n}}", |
||||||
|
construct_body, where_body |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
/// SPARQL literal escape: backslash, quotes, newlines, tabs.
|
||||||
|
fn escape_literal(lit: &str) -> String { |
||||||
|
let mut out = String::with_capacity(lit.len() + 4); |
||||||
|
for c in lit.chars() { |
||||||
|
match c { |
||||||
|
'\\' => out.push_str("\\\\"), |
||||||
|
'\"' => out.push_str("\\\""), |
||||||
|
'\n' => out.push_str("\\n"), |
||||||
|
'\r' => out.push_str("\\r"), |
||||||
|
'\t' => out.push_str("\\t"), |
||||||
|
_ => out.push(c), |
||||||
|
} |
||||||
|
} |
||||||
|
return out; |
||||||
|
} |
||||||
|
|
||||||
|
pub fn group_by_subject_for_shape<'a>( |
||||||
|
shape: &OrmSchemaShape, |
||||||
|
triples: &'a [Triple], |
||||||
|
allowed_subjects: &[String], |
||||||
|
) -> HashMap<String, Vec<&'a Triple>> { |
||||||
|
let mut triples_by_subject: HashMap<String, Vec<&Triple>> = HashMap::new(); |
||||||
|
let allowed_preds_set: HashSet<&str> = |
||||||
|
shape.predicates.iter().map(|p| p.iri.as_str()).collect(); |
||||||
|
let allowed_subject_set: HashSet<&str> = allowed_subjects.iter().map(|s| s.as_str()).collect(); |
||||||
|
for triple in triples { |
||||||
|
// triple.subject must be in allowed_subjects (or allowed_subjects empty)
|
||||||
|
// and triple.predicate must be in allowed_preds.
|
||||||
|
if allowed_preds_set.contains(triple.predicate.as_str()) { |
||||||
|
// filter subjects if list provided
|
||||||
|
let subj = match &triple.subject { |
||||||
|
Subject::NamedNode(n) => n.clone().into_string(), |
||||||
|
_ => continue, |
||||||
|
}; |
||||||
|
// Subject must be in allowed subjects (or allowed_subjects is empty).
|
||||||
|
if allowed_subject_set.is_empty() || allowed_subject_set.contains(&subj.as_str()) { |
||||||
|
triples_by_subject |
||||||
|
.entry(subj) |
||||||
|
.or_insert_with(Vec::new) |
||||||
|
.push(triple); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return triples_by_subject; |
||||||
|
} |
||||||
|
|
||||||
|
pub fn nuri_to_string(nuri: &NuriV0) -> String { |
||||||
|
// Get repo_id and overlay_id from the nuri
|
||||||
|
let repo_id = nuri.target.repo_id(); |
||||||
|
let overlay_id = if let Some(overlay_link) = &nuri.overlay { |
||||||
|
overlay_link.clone().try_into().unwrap() |
||||||
|
} else { |
||||||
|
// Default overlay for the repo
|
||||||
|
OverlayId::outer(repo_id) |
||||||
|
}; |
||||||
|
let graph_name = NuriV0::repo_graph_name(repo_id, &overlay_id); |
||||||
|
graph_name |
||||||
|
} |
Loading…
Reference in new issue