// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers // All rights reserved. // Licensed under the Apache License, Version 2.0 // // or the MIT license , // at your option. All files in the project carrying such // notice may not be copied, modified, or distributed except // according to those terms. use ng_oxigraph::oxrdf::Subject; use ng_repo::log::*; use ng_repo::types::OverlayId; use std::collections::HashMap; use std::collections::HashSet; use lazy_static::lazy_static; pub use ng_net::orm::{OrmDiff, OrmShapeType}; use ng_net::{app_protocol::*, orm::*}; use ng_oxigraph::oxrdf::Triple; use ng_repo::errors::NgError; use ng_repo::errors::VerifierError; use regex::Regex; use crate::orm::types::*; /// Heuristic: /// Consider a string an IRI if it contains alphanumeric characters and then a colon within the first 13 characters pub fn is_iri(s: &str) -> bool { lazy_static! { static ref IRI_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9+\.\-]{1,12}:").unwrap(); } IRI_REGEX.is_match(s) } pub fn literal_to_sparql_str(var: OrmSchemaDataType) -> Vec { match var.literals { None => [].to_vec(), Some(literals) => literals .iter() .map(|literal| match literal { BasicType::Bool(val) => { if *val { "true".to_string() } else { "false".to_string() } } BasicType::Num(number) => number.to_string(), BasicType::Str(sting) => { if is_iri(sting) { format!("<{}>", sting) } else { format!("\"{}\"", escape_literal(sting)) } } }) .collect(), } } pub fn shape_type_to_sparql( schema: &OrmSchema, shape: &ShapeIri, filter_subjects: Option>, ) -> Result { // Use a counter to generate unique variable names. let mut var_counter = 0; fn get_new_var_name(counter: &mut i32) -> String { let name = format!("v{}", counter); *counter += 1; name } // Collect all statements to be added to the construct and where bodies. let mut construct_statements = Vec::new(); let mut where_statements = Vec::new(); // Keep track of visited shapes while recursing to prevent infinite loops. let mut visited_shapes: HashSet = HashSet::new(); // Recursive function to call for (nested) shapes. fn process_shape( schema: &OrmSchema, shape: &OrmSchemaShape, subject_var_name: &str, construct_statements: &mut Vec, where_statements: &mut Vec, var_counter: &mut i32, visited_shapes: &mut HashSet, ) { // Prevent infinite recursion on cyclic schemas. // TODO: We could handle this as IRI string reference. if visited_shapes.contains(&shape.iri) { return; } visited_shapes.insert(shape.iri.clone()); // Add statements for each predicate. for predicate in &shape.predicates { let mut union_branches = Vec::new(); let mut allowed_literals = Vec::new(); // Predicate constraints might have more than one acceptable data type. Traverse each. // It is assumed that constant literals, nested shapes and regular types are not mixed. for datatype in &predicate.dataTypes { if datatype.valType == OrmSchemaLiteralType::literal { // Collect allowed literals and as strings // (already in SPARQL-format, e.g. `"a astring"`, ``, `true`, or `42`). allowed_literals.extend(literal_to_sparql_str(datatype.clone())); } else if datatype.valType == OrmSchemaLiteralType::shape { let shape_iri = &datatype.shape.clone().unwrap(); let nested_shape = schema.get(shape_iri).unwrap(); // For the current acceptable shape, add CONSTRUCT, WHERE, and recurse. // Each shape option gets its own var. let obj_var_name = get_new_var_name(var_counter); construct_statements.push(format!( " ?{} <{}> ?{}", subject_var_name, predicate.iri, obj_var_name )); // Those are later added to a UNION, if there is more than one shape. union_branches.push(format!( " ?{} <{}> ?{}", subject_var_name, predicate.iri, obj_var_name )); // Recurse to add statements for nested object. process_shape( schema, nested_shape, &obj_var_name, construct_statements, where_statements, var_counter, visited_shapes, ); } } // The where statement which might be wrapped in OPTIONAL. let where_body: String; if !allowed_literals.is_empty() && !predicate.extra.unwrap_or(false) && predicate.minCardinality > 0 { // If we have literal requirements and they are not optional ("extra"), // Add CONSTRUCT, WHERE, and FILTER. let pred_var_name = get_new_var_name(var_counter); construct_statements.push(format!( " ?{} <{}> ?{}", subject_var_name, predicate.iri, pred_var_name )); where_body = format!( " ?{s} <{p}> ?{o} . \n FILTER(?{o} IN ({lits}))", s = subject_var_name, p = predicate.iri, o = pred_var_name, lits = allowed_literals.join(", ") ); } else if !union_branches.is_empty() { // We have nested shape(s) which were already added to CONSTRUCT above. // Join them with UNION. where_body = union_branches .into_iter() .map(|b| format!("{{\n{}\n}}", b)) .collect::>() .join(" UNION "); } else { // Regular predicate data type. Just add basic CONSTRUCT and WHERE statements. let pred_var_name = get_new_var_name(var_counter); construct_statements.push(format!( " ?{} <{}> ?{}", subject_var_name, predicate.iri, pred_var_name )); where_body = format!( " ?{} <{}> ?{}", subject_var_name, predicate.iri, pred_var_name ); } // Wrap in optional, if necessary. if predicate.minCardinality < 1 { where_statements.push(format!(" OPTIONAL {{\n{}\n }}", where_body)); } else { where_statements.push(where_body); }; } visited_shapes.remove(&shape.iri); } let root_shape = schema.get(shape).ok_or(VerifierError::InvalidOrmSchema)?; // Root subject variable name let root_var_name = get_new_var_name(&mut var_counter); process_shape( schema, root_shape, &root_var_name, &mut construct_statements, &mut where_statements, &mut var_counter, &mut visited_shapes, ); // Filter subjects, if present. if let Some(subjects) = filter_subjects { log_debug!("filter_subjects: {:?}", subjects); let subjects_str = subjects .iter() .map(|s| format!("<{}>", s)) .collect::>() .join(", "); where_statements.push(format!(" FILTER(?v0 IN ({}))", subjects_str)); } // Create query from statements. let construct_body = construct_statements.join(" .\n"); let where_body = where_statements.join(" .\n"); Ok(format!( "CONSTRUCT {{\n{}\n}}\nWHERE {{\n{}\n}}", construct_body, where_body )) } /// SPARQL literal escape: backslash, quotes, newlines, tabs. fn escape_literal(lit: &str) -> String { let mut out = String::with_capacity(lit.len() + 4); for c in lit.chars() { match c { '\\' => out.push_str("\\\\"), '\"' => out.push_str("\\\""), '\n' => out.push_str("\\n"), '\r' => out.push_str("\\r"), '\t' => out.push_str("\\t"), _ => out.push(c), } } return out; } pub fn group_by_subject_for_shape<'a>( shape: &OrmSchemaShape, triples: &'a [Triple], allowed_subjects: &[String], ) -> HashMap> { let mut triples_by_subject: HashMap> = HashMap::new(); let allowed_preds_set: HashSet<&str> = shape.predicates.iter().map(|p| p.iri.as_str()).collect(); let allowed_subject_set: HashSet<&str> = allowed_subjects.iter().map(|s| s.as_str()).collect(); for triple in triples { // triple.subject must be in allowed_subjects (or allowed_subjects empty) // and triple.predicate must be in allowed_preds. if allowed_preds_set.contains(triple.predicate.as_str()) { // filter subjects if list provided let subj = match &triple.subject { Subject::NamedNode(n) => n.clone().into_string(), _ => continue, }; // Subject must be in allowed subjects (or allowed_subjects is empty). if allowed_subject_set.is_empty() || allowed_subject_set.contains(&subj.as_str()) { triples_by_subject .entry(subj) .or_insert_with(Vec::new) .push(triple); } } } return triples_by_subject; } pub fn nuri_to_string(nuri: &NuriV0) -> String { // Get repo_id and overlay_id from the nuri let repo_id = nuri.target.repo_id(); let overlay_id = if let Some(overlay_link) = &nuri.overlay { overlay_link.clone().try_into().unwrap() } else { // Default overlay for the repo OverlayId::outer(repo_id) }; let graph_name = NuriV0::repo_graph_name(repo_id, &overlay_id); graph_name }