Rust implementation of NextGraph, a Decentralized and local-first web 3.0 ecosystem https://nextgraph.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
nextgraph-rs/ng-verifier/src/orm/utils.rs

294 lines
11 KiB

// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers
// All rights reserved.
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
use ng_oxigraph::oxrdf::Subject;
use ng_repo::log::*;
use ng_repo::types::OverlayId;
use std::collections::HashMap;
use std::collections::HashSet;
use lazy_static::lazy_static;
pub use ng_net::orm::{OrmDiff, OrmShapeType};
use ng_net::{app_protocol::*, orm::*};
use ng_oxigraph::oxrdf::Triple;
use ng_repo::errors::NgError;
use ng_repo::errors::VerifierError;
use regex::Regex;
use crate::orm::types::*;
/// Heuristic:
/// Consider a string an IRI if it contains alphanumeric characters and then a colon within the first 13 characters
pub fn is_iri(s: &str) -> bool {
lazy_static! {
static ref IRI_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9+\.\-]{1,12}:").unwrap();
}
IRI_REGEX.is_match(s)
}
pub fn literal_to_sparql_str(var: OrmSchemaDataType) -> Vec<String> {
match var.literals {
None => [].to_vec(),
Some(literals) => literals
.iter()
.map(|literal| match literal {
BasicType::Bool(val) => {
if *val {
"true".to_string()
} else {
"false".to_string()
}
}
BasicType::Num(number) => number.to_string(),
BasicType::Str(sting) => {
if is_iri(sting) {
format!("<{}>", sting)
} else {
format!("\"{}\"", escape_literal(sting))
}
}
})
.collect(),
}
}
pub fn shape_type_to_sparql(
schema: &OrmSchema,
shape: &ShapeIri,
filter_subjects: Option<Vec<String>>,
) -> Result<String, NgError> {
// Use a counter to generate unique variable names.
let mut var_counter = 0;
fn get_new_var_name(counter: &mut i32) -> String {
let name = format!("v{}", counter);
*counter += 1;
name
}
// Collect all statements to be added to the construct and where bodies.
let mut construct_statements = Vec::new();
let mut where_statements = Vec::new();
// Keep track of visited shapes while recursing to prevent infinite loops.
let mut visited_shapes: HashSet<ShapeIri> = HashSet::new();
// Recursive function to call for (nested) shapes.
fn process_shape(
schema: &OrmSchema,
shape: &OrmSchemaShape,
subject_var_name: &str,
construct_statements: &mut Vec<String>,
where_statements: &mut Vec<String>,
var_counter: &mut i32,
visited_shapes: &mut HashSet<String>,
) {
// Prevent infinite recursion on cyclic schemas.
// TODO: We could handle this as IRI string reference.
if visited_shapes.contains(&shape.iri) {
return;
}
visited_shapes.insert(shape.iri.clone());
// Add statements for each predicate.
for predicate in &shape.predicates {
let mut union_branches = Vec::new();
let mut allowed_literals = Vec::new();
// Predicate constraints might have more than one acceptable data type. Traverse each.
// It is assumed that constant literals, nested shapes and regular types are not mixed.
for datatype in &predicate.dataTypes {
if datatype.valType == OrmSchemaLiteralType::literal {
// Collect allowed literals and as strings
// (already in SPARQL-format, e.g. `"a astring"`, `<http:ex.co/>`, `true`, or `42`).
allowed_literals.extend(literal_to_sparql_str(datatype.clone()));
} else if datatype.valType == OrmSchemaLiteralType::shape {
let shape_iri = &datatype.shape.clone().unwrap();
let nested_shape = schema.get(shape_iri).unwrap();
// For the current acceptable shape, add CONSTRUCT, WHERE, and recurse.
// Each shape option gets its own var.
let obj_var_name = get_new_var_name(var_counter);
construct_statements.push(format!(
" ?{} <{}> ?{}",
subject_var_name, predicate.iri, obj_var_name
));
// Those are later added to a UNION, if there is more than one shape.
union_branches.push(format!(
" ?{} <{}> ?{}",
subject_var_name, predicate.iri, obj_var_name
));
// Recurse to add statements for nested object.
process_shape(
schema,
nested_shape,
&obj_var_name,
construct_statements,
where_statements,
var_counter,
visited_shapes,
);
}
}
// The where statement which might be wrapped in OPTIONAL.
let where_body: String;
if !allowed_literals.is_empty()
&& !predicate.extra.unwrap_or(false)
&& predicate.minCardinality > 0
{
// If we have literal requirements and they are not optional ("extra"),
// Add CONSTRUCT, WHERE, and FILTER.
let pred_var_name = get_new_var_name(var_counter);
construct_statements.push(format!(
" ?{} <{}> ?{}",
subject_var_name, predicate.iri, pred_var_name
));
where_body = format!(
" ?{s} <{p}> ?{o} . \n FILTER(?{o} IN ({lits}))",
s = subject_var_name,
p = predicate.iri,
o = pred_var_name,
lits = allowed_literals.join(", ")
);
} else if !union_branches.is_empty() {
// We have nested shape(s) which were already added to CONSTRUCT above.
// Join them with UNION.
where_body = union_branches
.into_iter()
.map(|b| format!("{{\n{}\n}}", b))
.collect::<Vec<_>>()
.join(" UNION ");
} else {
// Regular predicate data type. Just add basic CONSTRUCT and WHERE statements.
let pred_var_name = get_new_var_name(var_counter);
construct_statements.push(format!(
" ?{} <{}> ?{}",
subject_var_name, predicate.iri, pred_var_name
));
where_body = format!(
" ?{} <{}> ?{}",
subject_var_name, predicate.iri, pred_var_name
);
}
// Wrap in optional, if necessary.
if predicate.minCardinality < 1 {
where_statements.push(format!(" OPTIONAL {{\n{}\n }}", where_body));
} else {
where_statements.push(where_body);
};
}
visited_shapes.remove(&shape.iri);
}
let root_shape = schema.get(shape).ok_or(VerifierError::InvalidOrmSchema)?;
// Root subject variable name
let root_var_name = get_new_var_name(&mut var_counter);
process_shape(
schema,
root_shape,
&root_var_name,
&mut construct_statements,
&mut where_statements,
&mut var_counter,
&mut visited_shapes,
);
// Filter subjects, if present.
if let Some(subjects) = filter_subjects {
log_debug!("filter_subjects: {:?}", subjects);
let subjects_str = subjects
.iter()
.map(|s| format!("<{}>", s))
.collect::<Vec<_>>()
.join(", ");
where_statements.push(format!(" FILTER(?v0 IN ({}))", subjects_str));
}
// Create query from statements.
let construct_body = construct_statements.join(" .\n");
let where_body = where_statements.join(" .\n");
Ok(format!(
"CONSTRUCT {{\n{}\n}}\nWHERE {{\n{}\n}}",
construct_body, where_body
))
}
/// SPARQL literal escape: backslash, quotes, newlines, tabs.
fn escape_literal(lit: &str) -> String {
let mut out = String::with_capacity(lit.len() + 4);
for c in lit.chars() {
match c {
'\\' => out.push_str("\\\\"),
'\"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
_ => out.push(c),
}
}
return out;
}
pub fn group_by_subject_for_shape<'a>(
shape: &OrmSchemaShape,
triples: &'a [Triple],
allowed_subjects: &[String],
) -> HashMap<String, Vec<&'a Triple>> {
let mut triples_by_subject: HashMap<String, Vec<&Triple>> = HashMap::new();
let allowed_preds_set: HashSet<&str> =
shape.predicates.iter().map(|p| p.iri.as_str()).collect();
let allowed_subject_set: HashSet<&str> = allowed_subjects.iter().map(|s| s.as_str()).collect();
for triple in triples {
// triple.subject must be in allowed_subjects (or allowed_subjects empty)
// and triple.predicate must be in allowed_preds.
if allowed_preds_set.contains(triple.predicate.as_str()) {
// filter subjects if list provided
let subj = match &triple.subject {
Subject::NamedNode(n) => n.clone().into_string(),
_ => continue,
};
// Subject must be in allowed subjects (or allowed_subjects is empty).
if allowed_subject_set.is_empty() || allowed_subject_set.contains(&subj.as_str()) {
triples_by_subject
.entry(subj)
.or_insert_with(Vec::new)
.push(triple);
}
}
}
return triples_by_subject;
}
pub fn nuri_to_string(nuri: &NuriV0) -> String {
// Get repo_id and overlay_id from the nuri
let repo_id = nuri.target.repo_id();
let overlay_id = if let Some(overlay_link) = &nuri.overlay {
overlay_link.clone().try_into().unwrap()
} else {
// Default overlay for the repo
OverlayId::outer(repo_id)
};
let graph_name = NuriV0::repo_graph_name(repo_id, &overlay_id);
graph_name
}