remove all dependecies to oxigraph crates, and add dep to ng-rocksdb

pull/19/head
Niko PLP 6 months ago
parent a89d173093
commit 7d6c5190c1
  1. 126
      Cargo.lock
  2. 25
      ng-oxigraph/Cargo.toml
  3. 22
      ng-oxigraph/src/lib.rs
  4. 2
      ng-oxigraph/src/oxigraph/io/format.rs
  5. 2
      ng-oxigraph/src/oxigraph/io/mod.rs
  6. 6
      ng-oxigraph/src/oxigraph/io/read.rs
  7. 6
      ng-oxigraph/src/oxigraph/io/write.rs
  8. 5
      ng-oxigraph/src/oxigraph/mod.rs
  9. 4
      ng-oxigraph/src/oxigraph/model.rs
  10. 9
      ng-oxigraph/src/oxigraph/sparql/algebra.rs
  11. 12
      ng-oxigraph/src/oxigraph/sparql/dataset.rs
  12. 10
      ng-oxigraph/src/oxigraph/sparql/error.rs
  13. 43
      ng-oxigraph/src/oxigraph/sparql/eval.rs
  14. 0
      ng-oxigraph/src/oxigraph/sparql/http/dummy.rs
  15. 0
      ng-oxigraph/src/oxigraph/sparql/http/mod.rs
  16. 0
      ng-oxigraph/src/oxigraph/sparql/http/simple.rs
  17. 31
      ng-oxigraph/src/oxigraph/sparql/mod.rs
  18. 11
      ng-oxigraph/src/oxigraph/sparql/model.rs
  19. 2
      ng-oxigraph/src/oxigraph/sparql/results.rs
  20. 12
      ng-oxigraph/src/oxigraph/sparql/service.rs
  21. 29
      ng-oxigraph/src/oxigraph/sparql/update.rs
  22. 4
      ng-oxigraph/src/oxigraph/storage/backend/fallback.rs
  23. 0
      ng-oxigraph/src/oxigraph/storage/backend/mod.rs
  24. 4
      ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs
  25. 16
      ng-oxigraph/src/oxigraph/storage/binary_encoder.rs
  26. 10
      ng-oxigraph/src/oxigraph/storage/error.rs
  27. 20
      ng-oxigraph/src/oxigraph/storage/mod.rs
  28. 8
      ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs
  29. 0
      ng-oxigraph/src/oxigraph/storage/small_string.rs
  30. 18
      ng-oxigraph/src/oxigraph/store.rs
  31. 51
      ng-oxigraph/src/oxrdf/README.md
  32. 403
      ng-oxigraph/src/oxrdf/blank_node.rs
  33. 1641
      ng-oxigraph/src/oxrdf/dataset.rs
  34. 284
      ng-oxigraph/src/oxrdf/graph.rs
  35. 535
      ng-oxigraph/src/oxrdf/interning.rs
  36. 669
      ng-oxigraph/src/oxrdf/literal.rs
  37. 24
      ng-oxigraph/src/oxrdf/mod.rs
  38. 237
      ng-oxigraph/src/oxrdf/named_node.rs
  39. 469
      ng-oxigraph/src/oxrdf/parser.rs
  40. 1368
      ng-oxigraph/src/oxrdf/triple.rs
  41. 216
      ng-oxigraph/src/oxrdf/variable.rs
  42. 242
      ng-oxigraph/src/oxrdf/vocab.rs
  43. 67
      ng-oxigraph/src/oxrdfio/README.md
  44. 124
      ng-oxigraph/src/oxrdfio/error.rs
  45. 216
      ng-oxigraph/src/oxrdfio/format.rs
  46. 9
      ng-oxigraph/src/oxrdfio/mod.rs
  47. 795
      ng-oxigraph/src/oxrdfio/parser.rs
  48. 412
      ng-oxigraph/src/oxrdfio/serializer.rs
  49. 56
      ng-oxigraph/src/oxrdfxml/README.md
  50. 89
      ng-oxigraph/src/oxrdfxml/error.rs
  51. 8
      ng-oxigraph/src/oxrdfxml/mod.rs
  52. 1237
      ng-oxigraph/src/oxrdfxml/parser.rs
  53. 461
      ng-oxigraph/src/oxrdfxml/serializer.rs
  54. 26
      ng-oxigraph/src/oxrdfxml/utils.rs
  55. 65
      ng-oxigraph/src/oxsdatatypes/README.md
  56. 134
      ng-oxigraph/src/oxsdatatypes/boolean.rs
  57. 3187
      ng-oxigraph/src/oxsdatatypes/date_time.rs
  58. 1099
      ng-oxigraph/src/oxsdatatypes/decimal.rs
  59. 326
      ng-oxigraph/src/oxsdatatypes/double.rs
  60. 1249
      ng-oxigraph/src/oxsdatatypes/duration.rs
  61. 310
      ng-oxigraph/src/oxsdatatypes/float.rs
  62. 400
      ng-oxigraph/src/oxsdatatypes/integer.rs
  63. 21
      ng-oxigraph/src/oxsdatatypes/mod.rs
  64. 54
      ng-oxigraph/src/oxttl/README.md
  65. 977
      ng-oxigraph/src/oxttl/lexer.rs
  66. 314
      ng-oxigraph/src/oxttl/line_formats.rs
  67. 19
      ng-oxigraph/src/oxttl/mod.rs
  68. 1326
      ng-oxigraph/src/oxttl/n3.rs
  69. 564
      ng-oxigraph/src/oxttl/nquads.rs
  70. 580
      ng-oxigraph/src/oxttl/ntriples.rs
  71. 1072
      ng-oxigraph/src/oxttl/terse.rs
  72. 97
      ng-oxigraph/src/oxttl/toolkit/error.rs
  73. 432
      ng-oxigraph/src/oxttl/toolkit/lexer.rs
  74. 13
      ng-oxigraph/src/oxttl/toolkit/mod.rs
  75. 183
      ng-oxigraph/src/oxttl/toolkit/parser.rs
  76. 1252
      ng-oxigraph/src/oxttl/trig.rs
  77. 878
      ng-oxigraph/src/oxttl/turtle.rs
  78. 72
      ng-oxigraph/src/sparesults/README.md
  79. 948
      ng-oxigraph/src/sparesults/csv.rs
  80. 157
      ng-oxigraph/src/sparesults/error.rs
  81. 176
      ng-oxigraph/src/sparesults/format.rs
  82. 1101
      ng-oxigraph/src/sparesults/json.rs
  83. 16
      ng-oxigraph/src/sparesults/mod.rs
  84. 460
      ng-oxigraph/src/sparesults/parser.rs
  85. 427
      ng-oxigraph/src/sparesults/serializer.rs
  86. 340
      ng-oxigraph/src/sparesults/solution.rs
  87. 833
      ng-oxigraph/src/sparesults/xml.rs
  88. 46
      ng-oxigraph/src/spargebra/README.md
  89. 1419
      ng-oxigraph/src/spargebra/algebra.rs
  90. 9
      ng-oxigraph/src/spargebra/mod.rs
  91. 2086
      ng-oxigraph/src/spargebra/parser.rs
  92. 300
      ng-oxigraph/src/spargebra/query.rs
  93. 1012
      ng-oxigraph/src/spargebra/term.rs
  94. 344
      ng-oxigraph/src/spargebra/update.rs
  95. 33
      ng-oxigraph/src/sparopt/README.md
  96. 1662
      ng-oxigraph/src/sparopt/algebra.rs
  97. 5
      ng-oxigraph/src/sparopt/mod.rs
  98. 1082
      ng-oxigraph/src/sparopt/optimizer.rs
  99. 462
      ng-oxigraph/src/sparopt/type_inference.rs
  100. 8
      ng-oxigraph/tests/store.rs
  101. Some files were not shown because too many files have changed in this diff Show More

126
Cargo.lock generated

@ -2911,21 +2911,6 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a"
[[package]]
name = "librocksdb-sys"
version = "0.11.0+8.3.2"
source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8"
dependencies = [
"bindgen",
"bzip2-sys",
"cc",
"glob",
"libc",
"libz-sys",
"openssl",
"pkg-config",
]
[[package]]
name = "libz-sys"
version = "1.1.12"
@ -3376,7 +3361,7 @@ dependencies = [
[[package]]
name = "ng-oxigraph"
version = "0.4.0-alpha.8-ng"
version = "0.4.0-alpha.9-ng"
dependencies = [
"codspeed-criterion-compat",
"digest 0.10.7",
@ -3386,20 +3371,18 @@ dependencies = [
"json-event-parser",
"libc",
"md-5",
"memchr",
"ng-rocksdb",
"oxilangtag",
"oxiri",
"oxrdf",
"oxrdfio",
"oxsdatatypes",
"peg",
"quick-xml 0.31.0",
"rand 0.8.5",
"regex",
"rocksdb",
"serde",
"sha1",
"sha2 0.10.7",
"siphasher 0.3.10",
"sparesults",
"spargebra",
"sparopt",
"thiserror",
"zstd",
]
@ -3437,6 +3420,21 @@ dependencies = [
"zeroize",
]
[[package]]
name = "ng-rocksdb"
version = "0.21.0"
source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#95ec9536b1a4088cfa75aae2851df468e64aa451"
dependencies = [
"bindgen",
"bzip2-sys",
"cc",
"glob",
"libc",
"libz-sys",
"openssl",
"pkg-config",
]
[[package]]
name = "ng-sdk-js"
version = "0.1.0"
@ -3468,7 +3466,7 @@ name = "ng-storage-rocksdb"
version = "0.1.0"
dependencies = [
"ng-repo",
"rocksdb",
"ng-rocksdb",
"serde_bare",
]
@ -3896,51 +3894,15 @@ dependencies = [
"thiserror",
]
[[package]]
name = "oxrdfio"
version = "0.1.0-alpha.5"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"oxrdf",
"oxrdfxml",
"oxttl",
"thiserror",
]
[[package]]
name = "oxrdfxml"
version = "0.1.0-alpha.5"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"oxilangtag",
"oxiri",
"oxrdf",
"quick-xml 0.31.0",
"thiserror",
]
[[package]]
name = "oxsdatatypes"
version = "0.2.0-alpha.1"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"js-sys",
"serde",
"thiserror",
]
[[package]]
name = "oxttl"
version = "0.1.0-alpha.5"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"memchr",
"oxilangtag",
"oxiri",
"oxrdf",
"thiserror",
]
[[package]]
name = "packed_simd_2"
version = "0.3.8"
@ -4679,15 +4641,6 @@ dependencies = [
"winreg 0.10.1",
]
[[package]]
name = "rocksdb"
version = "0.21.0"
source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8"
dependencies = [
"libc",
"librocksdb-sys",
]
[[package]]
name = "rust-embed"
version = "6.7.0"
@ -5239,41 +5192,6 @@ dependencies = [
"system-deps",
]
[[package]]
name = "sparesults"
version = "0.2.0-alpha.4"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"json-event-parser",
"memchr",
"oxrdf",
"quick-xml 0.31.0",
"thiserror",
]
[[package]]
name = "spargebra"
version = "0.3.0-alpha.4"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"oxilangtag",
"oxiri",
"oxrdf",
"peg",
"rand 0.8.5",
"thiserror",
]
[[package]]
name = "sparopt"
version = "0.1.0-alpha.5-dev"
source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5"
dependencies = [
"oxrdf",
"rand 0.8.5",
"spargebra",
]
[[package]]
name = "spin"
version = "0.9.8"

@ -1,6 +1,6 @@
[package]
name = "ng-oxigraph"
version = "0.4.0-alpha.8-ng"
version = "0.4.0-alpha.9-ng"
authors = ["Tpt <thomas@pellissier-tanon.fr>", "Niko PLP <niko@nextgraph.org>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
@ -16,8 +16,15 @@ edition = "2021"
rust-version = "1.70"
[features]
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
default = ["rdf-star","sep-0002","sep-0006", "oxsdatatypes"]
js = ["getrandom/js", "js-sys"]
rdf-star = []
custom-now = []
xml = []
ttl = []
sep-0002 = []
sep-0006 = []
oxsdatatypes = []
[dependencies]
digest = "0.10"
@ -26,22 +33,20 @@ json-event-parser = "0.2.0-alpha.2"
md-5 = "0.10"
oxilangtag = "0.1"
oxiri = "0.2.3"
oxrdf = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { version = "0.1.0-alpha.5", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] }
oxsdatatypes = { version = "0.2.0-alpha.1", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main" }
rand = "0.8"
regex = "1.7"
serde = { version = "1.0.142", features = ["derive"] }
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
sparesults = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { version = "0.1.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror = "1.0.50"
quick-xml = ">=0.29, <0.32"
memchr = "2.5"
peg = "0.8"
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
ng-rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom = "0.2.8"

@ -5,8 +5,20 @@
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;
pub mod oxigraph;
pub mod oxrdf;
pub mod oxrdfio;
pub mod oxsdatatypes;
pub mod oxttl;
pub mod oxrdfxml;
pub mod sparesults;
pub mod spargebra;
pub mod sparopt;

@ -1,6 +1,6 @@
#![allow(deprecated)]
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
use crate::oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
///

@ -36,4 +36,4 @@ pub use self::format::{DatasetFormat, GraphFormat};
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;
pub use crate::oxrdfio::*;

@ -2,9 +2,9 @@
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use crate::oxigraph::io::{DatasetFormat, GraphFormat};
use crate::oxigraph::model::*;
use crate::oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.

@ -2,9 +2,9 @@
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use crate::oxigraph::io::{DatasetFormat, GraphFormat};
use crate::oxigraph::model::*;
use crate::oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.

@ -0,0 +1,5 @@
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -17,6 +17,6 @@
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;
pub use crate::oxrdf::*;
pub use spargebra::term::GroundQuad;
pub use crate::spargebra::term::GroundQuad;

@ -2,10 +2,11 @@
//!
//! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`].
use crate::model::*;
use crate::sparql::eval::Timer;
use oxsdatatypes::DayTimeDuration;
use spargebra::GraphUpdateOperation;
use crate::oxigraph::model::*;
use crate::oxigraph::sparql::eval::Timer;
use crate::oxsdatatypes::DayTimeDuration;
use crate::spargebra;
use crate::spargebra::GraphUpdateOperation;
use std::fmt;
use std::str::FromStr;

@ -1,8 +1,10 @@
use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::{StorageError, StorageReader};
use crate::oxigraph::model::TermRef;
use crate::oxigraph::sparql::algebra::QueryDataset;
use crate::oxigraph::sparql::EvaluationError;
use crate::oxigraph::storage::numeric_encoder::{
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup,
};
use crate::oxigraph::storage::{StorageError, StorageReader};
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;

@ -1,8 +1,8 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use crate::oxigraph::io::RdfParseError;
use crate::oxigraph::model::NamedNode;
use crate::oxigraph::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::oxigraph::sparql::SparqlSyntaxError;
use crate::oxigraph::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;

@ -1,33 +1,34 @@
use crate::model::vocab::{rdf, xsd};
use crate::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple};
use crate::sparql::algebra::{Query, QueryDataset};
use crate::sparql::dataset::DatasetView;
use crate::sparql::error::EvaluationError;
use crate::sparql::model::*;
use crate::sparql::service::ServiceHandler;
use crate::sparql::CustomFunctionRegistry;
use crate::storage::numeric_encoder::*;
use crate::storage::small_string::SmallString;
use crate::oxigraph::model::vocab::{rdf, xsd};
use crate::oxigraph::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple};
use crate::oxigraph::sparql::algebra::{Query, QueryDataset};
use crate::oxigraph::sparql::dataset::DatasetView;
use crate::oxigraph::sparql::error::EvaluationError;
use crate::oxigraph::sparql::model::*;
use crate::oxigraph::sparql::service::ServiceHandler;
use crate::oxigraph::sparql::CustomFunctionRegistry;
use crate::oxigraph::storage::numeric_encoder::*;
use crate::oxigraph::storage::small_string::SmallString;
use crate::oxrdf::{TermRef, Variable};
use crate::oxsdatatypes::*;
use crate::spargebra;
use crate::spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression};
use crate::spargebra::term::{
GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern,
TriplePattern,
};
use crate::sparopt::algebra::{
AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm,
MinusAlgorithm, OrderExpression,
};
use digest::Digest;
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
use md5::Md5;
use oxilangtag::LanguageTag;
use oxiri::Iri;
use oxrdf::{TermRef, Variable};
use oxsdatatypes::*;
use rand::random;
use regex::{Regex, RegexBuilder};
use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512};
use spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression};
use spargebra::term::{
GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern,
TriplePattern,
};
use sparopt::algebra::{
AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm,
MinusAlgorithm, OrderExpression,
};
use std::cell::Cell;
use std::cmp::Ordering;
use std::collections::hash_map::DefaultHasher;

@ -12,22 +12,23 @@ pub mod results;
mod service;
mod update;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use super::model::{NamedNode, Term};
pub use super::sparql::algebra::{Query, QueryDataset, Update};
use super::sparql::dataset::DatasetView;
pub use super::sparql::error::EvaluationError;
use super::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use super::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
pub use super::sparql::service::ServiceHandler;
use super::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(super) use super::sparql::update::evaluate_update;
use super::storage::StorageReader;
pub use crate::oxrdf::{Variable, VariableNameParseError};
use crate::oxsdatatypes::{DayTimeDuration, Float};
use crate::spargebra;
pub use crate::spargebra::SparqlSyntaxError;
use crate::sparopt::algebra::GraphPattern;
use crate::sparopt::Optimizer;
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;

@ -1,11 +1,11 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
use crate::oxigraph::io::{RdfFormat, RdfSerializer};
use crate::oxigraph::model::*;
use crate::oxigraph::sparql::error::EvaluationError;
use crate::oxigraph::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
pub use sparesults::QuerySolution;
pub use crate::sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
@ -276,6 +276,7 @@ impl Iterator for QueryTripleIter {
}
}
#[cfg(feature = "rdf-star")]
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {

@ -41,4 +41,4 @@
//! );
//! ```
pub use sparesults::*;
pub use crate::sparesults::*;

@ -1,9 +1,9 @@
use crate::model::NamedNode;
use crate::sparql::algebra::Query;
use crate::sparql::error::EvaluationError;
use crate::sparql::http::Client;
use crate::sparql::model::QueryResults;
use crate::sparql::results::QueryResultsFormat;
use crate::oxigraph::model::NamedNode;
use crate::oxigraph::sparql::algebra::Query;
use crate::oxigraph::sparql::error::EvaluationError;
use crate::oxigraph::sparql::http::Client;
use crate::oxigraph::sparql::model::QueryResults;
use crate::oxigraph::sparql::results::QueryResultsFormat;
use std::error::Error;
use std::time::Duration;

@ -1,21 +1,22 @@
use crate::io::{RdfFormat, RdfParser};
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::sparql::algebra::QueryDataset;
use crate::sparql::dataset::DatasetView;
use crate::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::sparql::http::Client;
use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::StorageWriter;
use oxiri::Iri;
use spargebra::algebra::{GraphPattern, GraphTarget};
use spargebra::term::{
use crate::oxigraph::io::{RdfFormat, RdfParser};
use crate::oxigraph::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::oxigraph::sparql::algebra::QueryDataset;
use crate::oxigraph::sparql::dataset::DatasetView;
use crate::oxigraph::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::oxigraph::sparql::http::Client;
use crate::oxigraph::sparql::{EvaluationError, Update, UpdateOptions};
use crate::oxigraph::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::oxigraph::storage::StorageWriter;
use crate::spargebra::algebra::{GraphPattern, GraphTarget};
use crate::spargebra::term::{
BlankNode, GraphName, GraphNamePattern, GroundQuad, GroundQuadPattern, GroundSubject,
GroundTerm, GroundTermPattern, GroundTriple, GroundTriplePattern, NamedNode, NamedNodePattern,
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable,
};
use spargebra::GraphUpdateOperation;
use sparopt::Optimizer;
use crate::spargebra::GraphUpdateOperation;
use crate::sparopt;
use crate::sparopt::Optimizer;
use oxiri::Iri;
use std::collections::HashMap;
use std::io;
use std::rc::Rc;

@ -1,7 +1,7 @@
//! TODO: This storage is dramatically naive.
use crate::storage::StorageError;
use crate::store::CorruptionError;
use crate::oxigraph::storage::StorageError;
use crate::oxigraph::store::CorruptionError;
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use std::error::Error;

@ -8,10 +8,10 @@
clippy::unwrap_in_result
)]
use crate::storage::error::{CorruptionError, StorageError};
use crate::oxigraph::storage::error::{CorruptionError, StorageError};
use libc::{c_char, c_void};
use ng_rocksdb::ffi::*;
use rand::random;
use rocksdb::ffi::*;
use std::borrow::Borrow;
#[cfg(unix)]
use std::cmp::min;

@ -1,7 +1,7 @@
use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::storage::small_string::SmallString;
use oxsdatatypes::*;
use crate::oxigraph::storage::error::{CorruptionError, StorageError};
use crate::oxigraph::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::oxigraph::storage::small_string::SmallString;
use crate::oxsdatatypes::*;
use std::io::Read;
use std::mem::size_of;
@ -635,8 +635,8 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use crate::model::TermRef;
use crate::storage::numeric_encoder::*;
use crate::oxigraph::model::TermRef;
use crate::oxigraph::storage::numeric_encoder::*;
use std::cell::RefCell;
use std::collections::HashMap;
@ -670,8 +670,8 @@ mod tests {
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
use crate::model::*;
use crate::oxigraph::model::vocab::xsd;
use crate::oxigraph::model::*;
let store = MemoryStrStore::default();
let terms: Vec<Term> = vec![

@ -1,7 +1,7 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use crate::oxigraph::io::{RdfFormat, RdfParseError};
use crate::oxigraph::storage::numeric_encoder::EncodedTerm;
use crate::oxrdf::TermRef;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
@ -78,7 +78,7 @@ impl From<CorruptionError> for io::Error {
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
/// An error raised while loading a file into a [`Store`](crate::oxigraph::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
@ -111,7 +111,7 @@ impl From<LoaderError> for io::Error {
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
/// An error raised while writing a file from a [`Store`](crate::oxigraph::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.

@ -1,20 +1,24 @@
#![allow(clippy::same_name_method)]
#[cfg(all(not(target_family = "wasm")))]
use crate::model::Quad;
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef};
use crate::storage::backend::{Reader, Transaction};
use crate::oxigraph::model::Quad;
use crate::oxigraph::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef};
use crate::oxigraph::storage::backend::{Reader, Transaction};
#[cfg(all(not(target_family = "wasm")))]
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION;
use crate::storage::binary_encoder::{
use crate::oxigraph::storage::binary_encoder::LATEST_STORAGE_VERSION;
use crate::oxigraph::storage::binary_encoder::{
decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple,
write_gosp_quad, write_gpos_quad, write_gspo_quad, write_osp_quad, write_ospg_quad,
write_pos_quad, write_posg_quad, write_spo_quad, write_spog_quad, write_term, QuadEncoding,
WRITTEN_TERM_MAX_SIZE,
};
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError};
pub use crate::oxigraph::storage::error::{
CorruptionError, LoaderError, SerializerError, StorageError,
};
#[cfg(all(not(target_family = "wasm")))]
use crate::storage::numeric_encoder::Decoder;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::oxigraph::storage::numeric_encoder::Decoder;
use crate::oxigraph::storage::numeric_encoder::{
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup,
};
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
#[cfg(all(not(target_family = "wasm")))]
use std::collections::VecDeque;

@ -1,9 +1,9 @@
#![allow(clippy::unreadable_literal)]
use crate::model::*;
use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::small_string::SmallString;
use oxsdatatypes::*;
use crate::oxigraph::model::*;
use crate::oxigraph::storage::error::{CorruptionError, StorageError};
use crate::oxigraph::storage::small_string::SmallString;
use crate::oxsdatatypes::*;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::fmt::Debug;
use std::hash::{Hash, Hasher};

@ -26,20 +26,20 @@
//! # Result::<_, Box<dyn std::error::Error>>::Ok(())
//! ```
#[cfg(all(not(target_family = "wasm")))]
use crate::io::RdfParseError;
use crate::io::{RdfFormat, RdfParser, RdfSerializer};
use crate::model::*;
use crate::sparql::{
use super::io::RdfParseError;
use super::io::{RdfFormat, RdfParser, RdfSerializer};
use super::model::*;
use super::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions,
QueryResults, Update, UpdateOptions,
};
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
use super::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
#[cfg(all(not(target_family = "wasm")))]
use crate::storage::StorageBulkLoader;
use crate::storage::{
use super::storage::StorageBulkLoader;
use super::storage::{
ChainedDecodingQuadIterator, DecodingGraphIterator, Storage, StorageReader, StorageWriter,
};
pub use crate::storage::{CorruptionError, LoaderError, SerializerError, StorageError};
pub use super::storage::{CorruptionError, LoaderError, SerializerError, StorageError};
use std::error::Error;
use std::io::{Read, Write};
#[cfg(all(not(target_family = "wasm")))]
@ -1930,7 +1930,7 @@ mod tests {
#[test]
fn store() -> Result<(), StorageError> {
use crate::model::*;
use super::super::model::*;
let main_s = Subject::from(BlankNode::default());
let main_p = NamedNode::new("http://example.com").unwrap();

@ -0,0 +1,51 @@
OxRDF
=====
[![Latest Version](https://img.shields.io/crates/v/oxrdf.svg)](https://crates.io/crates/oxrdf)
[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/).
This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra).
Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is available behind the `rdf-star` feature.
OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/).
Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files.
Usage example:
```rust
use oxrdf::*;
let mut graph = Graph::default();
// insertion
let ex = NamedNodeRef::new("http://example.com").unwrap();
let triple = TripleRef::new(ex, ex, ex);
graph.insert(triple);
// simple filter
let results: Vec<_> = graph.triples_for_subject(ex).collect();
assert_eq!(vec![triple], results);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,403 @@
use rand::random;
use serde::{Deserialize, Serialize};
use std::io::Write;
use std::{fmt, str};
/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
/// The common way to create a new blank node is to use the [`BlankNode::default()`] function.
///
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function.
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
pub struct BlankNode(BlankNodeContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
enum BlankNodeContent {
Named(String),
Anonymous { id: u128, str: IdStr },
}
impl BlankNode {
/// Creates a blank node from a unique identifier.
///
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
/// that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
let id = id.into();
validate_blank_node_identifier(&id)?;
Ok(Self::new_unchecked(id))
}
/// Creates a blank node from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to N-Triples, Turtle, and SPARQL grammars.
///
/// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(id: impl Into<String>) -> Self {
let id = id.into();
if let Some(numerical_id) = to_integer_id(&id) {
Self::new_from_unique_id(numerical_id)
} else {
Self(BlankNodeContent::Named(id))
}
}
/// Creates a blank node from a unique numerical id.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
#[inline]
pub fn new_from_unique_id(id: u128) -> Self {
Self(BlankNodeContent::Anonymous {
id,
str: IdStr::new(id),
})
}
/// Returns the underlying ID of this blank node.
#[inline]
pub fn as_str(&self) -> &str {
match &self.0 {
BlankNodeContent::Named(id) => id,
BlankNodeContent::Anonymous { str, .. } => str.as_str(),
}
}
/// Returns the underlying ID of this blank node.
#[inline]
pub fn into_string(self) -> String {
match self.0 {
BlankNodeContent::Named(id) => id,
BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(),
}
}
#[inline]
pub fn as_ref(&self) -> BlankNodeRef<'_> {
BlankNodeRef(match &self.0 {
BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()),
BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous {
id: *id,
str: str.as_str(),
},
})
}
}
impl fmt::Display for BlankNode {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
#[inline]
fn default() -> Self {
// We ensure the ID does not start with a number to be also valid with RDF/XML
loop {
let id = random();
let str = IdStr::new(id);
if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
return Self(BlankNodeContent::Anonymous { id, str });
}
}
}
}
/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
///
/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method.
///
/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function.
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::BlankNodeRef;
///
/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>);
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
enum BlankNodeRefContent<'a> {
Named(&'a str),
Anonymous { id: u128, str: &'a str },
}
impl<'a> BlankNodeRef<'a> {
/// Creates a blank node from a unique identifier.
///
/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
///
/// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
/// that creates a random ID that could be easily inlined by Oxigraph stores.
pub fn new(id: &'a str) -> Result<Self, BlankNodeIdParseError> {
validate_blank_node_identifier(id)?;
Ok(Self::new_unchecked(id))
}
/// Creates a blank node from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to N-Triples, Turtle, and SPARQL grammars.
///
/// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(id: &'a str) -> Self {
if let Some(numerical_id) = to_integer_id(id) {
Self(BlankNodeRefContent::Anonymous {
id: numerical_id,
str: id,
})
} else {
Self(BlankNodeRefContent::Named(id))
}
}
/// Returns the underlying ID of this blank node.
#[inline]
pub const fn as_str(self) -> &'a str {
match self.0 {
BlankNodeRefContent::Named(id) => id,
BlankNodeRefContent::Anonymous { str, .. } => str,
}
}
/// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`].
///
/// ```
/// use oxrdf::BlankNode;
///
/// assert_eq!(
/// BlankNode::new_from_unique_id(128).as_ref().unique_id(),
/// Some(128)
/// );
/// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
/// ```
#[inline]
pub const fn unique_id(&self) -> Option<u128> {
match self.0 {
BlankNodeRefContent::Named(_) => None,
BlankNodeRefContent::Anonymous { id, .. } => Some(id),
}
}
#[inline]
pub fn into_owned(self) -> BlankNode {
BlankNode(match self.0 {
BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()),
BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous {
id,
str: IdStr::new(id),
},
})
}
}
impl fmt::Display for BlankNodeRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "_:{}", self.as_str())
}
}
impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> {
#[inline]
fn from(node: &'a BlankNode) -> Self {
node.as_ref()
}
}
impl<'a> From<BlankNodeRef<'a>> for BlankNode {
#[inline]
fn from(node: BlankNodeRef<'a>) -> Self {
node.into_owned()
}
}
impl PartialEq<BlankNode> for BlankNodeRef<'_> {
#[inline]
fn eq(&self, other: &BlankNode) -> bool {
*self == other.as_ref()
}
}
impl PartialEq<BlankNodeRef<'_>> for BlankNode {
#[inline]
fn eq(&self, other: &BlankNodeRef<'_>) -> bool {
self.as_ref() == *other
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
struct IdStr([u8; 32]);
impl IdStr {
#[inline]
fn new(id: u128) -> Self {
let mut str = [0; 32];
write!(&mut str[..], "{id:x}").unwrap();
Self(str)
}
#[inline]
fn as_str(&self) -> &str {
let len = self.0.iter().position(|x| x == &0).unwrap_or(32);
str::from_utf8(&self.0[..len]).unwrap()
}
}
fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(BlankNodeIdParseError)?;
match front {
'0'..='9'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError),
}
for c in chars {
match c {
'.' // validated later
| '-'
| '0'..='9'
| '\u{00B7}'
| '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(BlankNodeIdParseError),
}
}
// Could not end with a dot
if id.ends_with('.') {
Err(BlankNodeIdParseError)
} else {
Ok(())
}
}
#[inline]
fn to_integer_id(id: &str) -> Option<u128> {
let digits = id.as_bytes();
let mut value: u128 = 0;
if let None | Some(b'0') = digits.first() {
return None; // No empty string or leading zeros
}
for digit in digits {
value = value.checked_mul(16)?.checked_add(
match *digit {
b'0'..=b'9' => digit - b'0',
b'a'..=b'f' => digit - b'a' + 10,
_ => return None,
}
.into(),
)?;
}
Some(value)
}
/// An error raised during [`BlankNode`] IDs validation.
#[derive(Debug, thiserror::Error)]
#[error("The blank node identifier is invalid")]
pub struct BlankNodeIdParseError;
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn as_str_partial() {
let b = BlankNode::new_from_unique_id(0x42);
assert_eq!(b.as_str(), "42");
}
#[test]
fn as_str_full() {
let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000);
assert_eq!(b.as_str(), "77776666555544443333222211110000");
}
#[test]
fn new_validation() {
BlankNode::new("").unwrap_err();
BlankNode::new("a").unwrap();
BlankNode::new("-").unwrap_err();
BlankNode::new("a-").unwrap();
BlankNode::new(".").unwrap_err();
BlankNode::new("a.").unwrap_err();
BlankNode::new("a.a").unwrap();
}
#[test]
fn new_numerical() {
assert_eq!(
BlankNode::new("100a").unwrap(),
BlankNode::new_from_unique_id(0x100a),
);
assert_ne!(
BlankNode::new("100A").unwrap(),
BlankNode::new_from_unique_id(0x100a)
);
}
#[test]
fn test_equals() {
assert_eq!(
BlankNode::new("100a").unwrap(),
BlankNodeRef::new("100a").unwrap()
);
assert_eq!(
BlankNode::new("zzz").unwrap(),
BlankNodeRef::new("zzz").unwrap()
);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,284 @@
//! [In-memory implementation](Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
//!
//! Usage example:
//! ```
//! use oxrdf::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com")?;
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//!
//! // Print
//! assert_eq!(
//! graph.to_string(),
//! "<http://example.com> <http://example.com> <http://example.com> .\n"
//! );
//! # Result::<_,Box<dyn std::error::Error>>::Ok(())
//! ```
//!
//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time.
pub use crate::oxrdf::dataset::CanonicalizationAlgorithm;
use crate::oxrdf::dataset::*;
use crate::oxrdf::*;
use std::fmt;
/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// It can accommodate a fairly large number of triples (in the few millions).
///
/// <div class="warning">It interns the string and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.</div>
///
/// Usage example:
/// ```
/// use oxrdf::*;
///
/// let mut graph = Graph::default();
///
/// // insertion
/// let ex = NamedNodeRef::new("http://example.com")?;
/// let triple = TripleRef::new(ex, ex, ex);
/// graph.insert(triple);
///
/// // simple filter
/// let results: Vec<_> = graph.triples_for_subject(ex).collect();
/// assert_eq!(vec![triple], results);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Debug, Default, Clone)]
pub struct Graph {
dataset: Dataset,
}
impl Graph {
/// Creates a new graph.
pub fn new() -> Self {
Self::default()
}
fn graph(&self) -> GraphView<'_> {
self.dataset.graph(GraphNameRef::DefaultGraph)
}
fn graph_mut(&mut self) -> GraphViewMut<'_> {
self.dataset.graph_mut(GraphNameRef::DefaultGraph)
}
/// Returns all the triples contained by the graph.
pub fn iter(&self) -> Iter<'_> {
Iter {
inner: self.graph().iter(),
}
}
pub fn triples_for_subject<'a, 'b>(
&'a self,
subject: impl Into<SubjectRef<'b>>,
) -> impl Iterator<Item = TripleRef<'a>> + 'a {
self.graph()
.triples_for_interned_subject(self.dataset.encoded_subject(subject))
}
pub fn objects_for_subject_predicate<'a, 'b>(
&'a self,
subject: impl Into<SubjectRef<'b>>,
predicate: impl Into<NamedNodeRef<'b>>,
) -> impl Iterator<Item = TermRef<'a>> + 'a {
self.graph().objects_for_interned_subject_predicate(
self.dataset.encoded_subject(subject),
self.dataset.encoded_named_node(predicate),
)
}
pub fn object_for_subject_predicate<'a, 'b>(
&'a self,
subject: impl Into<SubjectRef<'b>>,
predicate: impl Into<NamedNodeRef<'b>>,
) -> Option<TermRef<'a>> {
self.graph()
.objects_for_subject_predicate(subject, predicate)
.next()
}
pub fn predicates_for_subject_object<'a, 'b>(
&'a self,
subject: impl Into<SubjectRef<'b>>,
object: impl Into<TermRef<'b>>,
) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a {
self.graph().predicates_for_interned_subject_object(
self.dataset.encoded_subject(subject),
self.dataset.encoded_term(object),
)
}
pub fn triples_for_predicate<'a, 'b>(
&'a self,
predicate: impl Into<NamedNodeRef<'b>>,
) -> impl Iterator<Item = TripleRef<'a>> + 'a {
self.graph()
.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate))
}
pub fn subjects_for_predicate_object<'a, 'b>(
&'a self,
predicate: impl Into<NamedNodeRef<'b>>,
object: impl Into<TermRef<'b>>,
) -> impl Iterator<Item = SubjectRef<'a>> + 'a {
self.graph().subjects_for_interned_predicate_object(
self.dataset.encoded_named_node(predicate),
self.dataset.encoded_term(object),
)
}
pub fn subject_for_predicate_object<'a, 'b>(
&'a self,
predicate: impl Into<NamedNodeRef<'b>>,
object: impl Into<TermRef<'b>>,
) -> Option<SubjectRef<'a>> {
self.graph().subject_for_predicate_object(predicate, object)
}
pub fn triples_for_object<'a, 'b>(
&'a self,
object: impl Into<TermRef<'b>>,
) -> impl Iterator<Item = TripleRef<'a>> + 'a {
self.graph()
.triples_for_interned_object(self.dataset.encoded_term(object))
}
/// Checks if the graph contains the given triple.
pub fn contains<'a>(&self, triple: impl Into<TripleRef<'a>>) -> bool {
self.graph().contains(triple)
}
/// Returns the number of triples in this graph.
pub fn len(&self) -> usize {
self.dataset.len()
}
/// Checks if this graph contains a triple.
pub fn is_empty(&self) -> bool {
self.dataset.is_empty()
}
/// Adds a triple to the graph.
pub fn insert<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool {
self.graph_mut().insert(triple)
}
/// Removes a concrete triple from the graph.
pub fn remove<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> bool {
self.graph_mut().remove(triple)
}
/// Clears the graph.
pub fn clear(&mut self) {
self.dataset.clear()
}
/// Canonicalizes the dataset by renaming blank nodes.
///
/// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ```
/// use oxrdf::graph::CanonicalizationAlgorithm;
/// use oxrdf::*;
///
/// let iri = NamedNodeRef::new("http://example.com")?;
///
/// let mut graph1 = Graph::new();
/// let bnode1 = BlankNode::default();
/// graph1.insert(TripleRef::new(iri, iri, &bnode1));
/// graph1.insert(TripleRef::new(&bnode1, iri, iri));
///
/// let mut graph2 = Graph::new();
/// let bnode2 = BlankNode::default();
/// graph2.insert(TripleRef::new(iri, iri, &bnode2));
/// graph2.insert(TripleRef::new(&bnode2, iri, iri));
///
/// assert_ne!(graph1, graph2);
/// graph1.canonicalize(CanonicalizationAlgorithm::Unstable);
/// graph2.canonicalize(CanonicalizationAlgorithm::Unstable);
/// assert_eq!(graph1, graph2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
///
/// <div class="warning">Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.</div>
///
/// <div class="warning">This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.</div>
pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) {
self.dataset.canonicalize(algorithm)
}
}
impl PartialEq for Graph {
fn eq(&self, other: &Self) -> bool {
self.dataset == other.dataset
}
}
impl Eq for Graph {}
impl<'a> IntoIterator for &'a Graph {
type Item = TripleRef<'a>;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl FromIterator<Triple> for Graph {
fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self {
let mut g = Self::new();
g.extend(iter);
g
}
}
impl<'a, T: Into<TripleRef<'a>>> FromIterator<T> for Graph {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut g = Self::new();
g.extend(iter);
g
}
}
impl Extend<Triple> for Graph {
fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) {
self.graph_mut().extend(iter)
}
}
impl<'a, T: Into<TripleRef<'a>>> Extend<T> for Graph {
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
self.graph_mut().extend(iter)
}
}
impl fmt::Display for Graph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.graph().fmt(f)
}
}
/// Iterator returned by [`Graph::iter`].
pub struct Iter<'a> {
inner: GraphViewIter<'a>,
}
impl<'a> Iterator for Iter<'a> {
type Item = TripleRef<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}

@ -0,0 +1,535 @@
//! Interning of RDF elements using Rodeo
use crate::oxrdf::*;
use std::collections::hash_map::{Entry, HashMap, RandomState};
use std::hash::{BuildHasher, Hasher};
#[derive(Debug, Default, Clone)]
pub struct Interner {
hasher: RandomState,
string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
string_for_blank_node_id: HashMap<u128, String>,
#[cfg(feature = "rdf-star")]
triples: HashMap<InternedTriple, Triple>,
}
impl Interner {
#[allow(clippy::never_loop)]
fn get_or_intern(&mut self, value: &str) -> Key {
let mut hash = self.hash(value);
loop {
match self.string_for_hash.entry(hash) {
Entry::Vacant(e) => {
e.insert(value.into());
return Key(hash);
}
Entry::Occupied(e) => loop {
if e.get() == value {
return Key(hash);
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
},
}
}
}
fn get(&self, value: &str) -> Option<Key> {
let mut hash = self.hash(value);
loop {
let v = self.string_for_hash.get(&hash)?;
if v == value {
return Some(Key(hash));
} else if hash == u64::MAX - 1 {
hash = 0;
} else {
hash += 1;
}
}
}
fn hash(&self, value: &str) -> u64 {
let mut hasher = self.hasher.build_hasher();
hasher.write(value.as_bytes());
let hash = hasher.finish();
if hash == u64::MAX {
0
} else {
hash
}
}
fn resolve(&self, key: Key) -> &str {
&self.string_for_hash[&key.0]
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct Key(u64);
impl Key {
fn first() -> Self {
Self(0)
}
fn next(self) -> Self {
Self(self.0.saturating_add(1))
}
fn impossible() -> Self {
Self(u64::MAX)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct InternedNamedNode {
id: Key,
}
impl InternedNamedNode {
pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
Self {
id: interner.get_or_intern(named_node.as_str()),
}
}
pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
Some(Self {
id: interner.get(named_node.as_str())?,
})
}
pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
NamedNodeRef::new_unchecked(interner.resolve(self.id))
}
pub fn first() -> Self {
Self { id: Key::first() }
}
pub fn next(self) -> Self {
Self { id: self.id.next() }
}
pub fn impossible() -> Self {
Self {
id: Key::impossible(),
}
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedBlankNode {
Number { id: u128 },
Other { id: Key },
}
impl InternedBlankNode {
pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.entry(id)
.or_insert_with(|| blank_node.as_str().into());
Self::Number { id }
} else {
Self::Other {
id: interner.get_or_intern(blank_node.as_str()),
}
}
}
pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
if let Some(id) = blank_node.unique_id() {
interner
.string_for_blank_node_id
.contains_key(&id)
.then_some(Self::Number { id })
} else {
Some(Self::Other {
id: interner.get(blank_node.as_str())?,
})
}
}
pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
BlankNodeRef::new_unchecked(match self {
Self::Number { id } => &interner.string_for_blank_node_id[&id],
Self::Other { id } => interner.resolve(id),
})
}
pub fn next(self) -> Self {
match self {
Self::Number { id } => Self::Number {
id: id.saturating_add(1),
},
Self::Other { id } => Self::Other { id: id.next() },
}
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub enum InternedLiteral {
String {
value_id: Key,
},
LanguageTaggedString {
value_id: Key,
language_id: Key,
},
TypedLiteral {
value_id: Key,
datatype: InternedNamedNode,
},
}
impl InternedLiteral {
pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
let value_id = interner.get_or_intern(literal.value());
if literal.is_plain() {
if let Some(language) = literal.language() {
Self::LanguageTaggedString {
value_id,
language_id: interner.get_or_intern(language),
}
} else {
Self::String { value_id }
}
} else {
Self::TypedLiteral {
value_id,
datatype: InternedNamedNode::encoded_into(literal.datatype(), interner),
}
}
}
pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
let value_id = interner.get(literal.value())?;
Some(if literal.is_plain() {
if let Some(language) = literal.language() {
Self::LanguageTaggedString {
value_id,
language_id: interner.get(language)?,
}
} else {
Self::String { value_id }
}
} else {
Self::TypedLiteral {
value_id,
datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?,
}
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
match self {
Self::String { value_id } => {
LiteralRef::new_simple_literal(interner.resolve(*value_id))
}
Self::LanguageTaggedString {
value_id,
language_id,
} => LiteralRef::new_language_tagged_literal_unchecked(
interner.resolve(*value_id),
interner.resolve(*language_id),
),
Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
interner.resolve(*value_id),
datatype.decode_from(interner),
),
}
}
pub fn next(&self) -> Self {
match self {
Self::String { value_id } => Self::String {
value_id: value_id.next(),
},
Self::LanguageTaggedString {
value_id,
language_id,
} => Self::LanguageTaggedString {
value_id: *value_id,
language_id: language_id.next(),
},
Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
value_id: *value_id,
datatype: datatype.next(),
},
}
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum InternedSubject {
NamedNode(InternedNamedNode),
BlankNode(InternedBlankNode),
#[cfg(feature = "rdf-star")]
Triple(Box<InternedTriple>),
}
impl InternedSubject {
pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self {
match node {
SubjectRef::NamedNode(node) => {
Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
}
SubjectRef::BlankNode(node) => {
Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
}
#[cfg(feature = "rdf-star")]
SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
triple.as_ref(),
interner,
))),
}
}
pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option<Self> {
Some(match node {
SubjectRef::NamedNode(node) => {
Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
}
SubjectRef::BlankNode(node) => {
Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
}
#[cfg(feature = "rdf-star")]
SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
triple.as_ref(),
interner,
)?)),
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> {
match self {
Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)),
Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)),
#[cfg(feature = "rdf-star")]
Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]),
}
}
pub fn first() -> Self {
Self::NamedNode(InternedNamedNode::first())
}
pub fn next(&self) -> Self {
match self {
Self::NamedNode(node) => Self::NamedNode(node.next()),
Self::BlankNode(node) => Self::BlankNode(node.next()),
#[cfg(feature = "rdf-star")]
Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
}
}
pub fn impossible() -> Self {
Self::NamedNode(InternedNamedNode::impossible())
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum InternedGraphName {
DefaultGraph,
NamedNode(InternedNamedNode),
BlankNode(InternedBlankNode),
}
impl InternedGraphName {
pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self {
match node {
GraphNameRef::DefaultGraph => Self::DefaultGraph,
GraphNameRef::NamedNode(node) => {
Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
}
GraphNameRef::BlankNode(node) => {
Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
}
}
}
pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option<Self> {
Some(match node {
GraphNameRef::DefaultGraph => Self::DefaultGraph,
GraphNameRef::NamedNode(node) => {
Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
}
GraphNameRef::BlankNode(node) => {
Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
}
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> {
match self {
Self::DefaultGraph => GraphNameRef::DefaultGraph,
Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)),
Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)),
}
}
pub fn first() -> Self {
Self::DefaultGraph
}
pub fn next(&self) -> Self {
match self {
Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()),
Self::NamedNode(node) => Self::NamedNode(node.next()),
Self::BlankNode(node) => Self::BlankNode(node.next()),
}
}
pub fn impossible() -> Self {
Self::NamedNode(InternedNamedNode::impossible())
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum InternedTerm {
NamedNode(InternedNamedNode),
BlankNode(InternedBlankNode),
Literal(InternedLiteral),
#[cfg(feature = "rdf-star")]
Triple(Box<InternedTriple>),
}
impl InternedTerm {
pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self {
match term {
TermRef::NamedNode(term) => {
Self::NamedNode(InternedNamedNode::encoded_into(term, interner))
}
TermRef::BlankNode(term) => {
Self::BlankNode(InternedBlankNode::encoded_into(term, interner))
}
TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)),
#[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
triple.as_ref(),
interner,
))),
}
}
pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option<Self> {
Some(match term {
TermRef::NamedNode(term) => {
Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?)
}
TermRef::BlankNode(term) => {
Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?)
}
TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?),
#[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
triple.as_ref(),
interner,
)?)),
})
}
pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> {
match self {
Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)),
Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)),
Self::Literal(term) => TermRef::Literal(term.decode_from(interner)),
#[cfg(feature = "rdf-star")]
Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]),
}
}
pub fn first() -> Self {
Self::NamedNode(InternedNamedNode::first())
}
pub fn next(&self) -> Self {
match self {
Self::NamedNode(node) => Self::NamedNode(node.next()),
Self::BlankNode(node) => Self::BlankNode(node.next()),
Self::Literal(node) => Self::Literal(node.next()),
#[cfg(feature = "rdf-star")]
Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
}
}
pub fn impossible() -> Self {
Self::NamedNode(InternedNamedNode::impossible())
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct InternedTriple {
pub subject: InternedSubject,
pub predicate: InternedNamedNode,
pub object: InternedTerm,
}
#[cfg(feature = "rdf-star")]
impl InternedTriple {
pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self {
let interned_triple = Self {
subject: InternedSubject::encoded_into(triple.subject, interner),
predicate: InternedNamedNode::encoded_into(triple.predicate, interner),
object: InternedTerm::encoded_into(triple.object, interner),
};
interner
.triples
.insert(interned_triple.clone(), triple.into_owned());
interned_triple
}
pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option<Self> {
let interned_triple = Self {
subject: InternedSubject::encoded_from(triple.subject, interner)?,
predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?,
object: InternedTerm::encoded_from(triple.object, interner)?,
};
interner
.triples
.contains_key(&interned_triple)
.then_some(interned_triple)
}
pub fn next(&self) -> Self {
Self {
subject: self.subject.clone(),
predicate: self.predicate,
object: self.object.next(),
}
}
}
#[derive(Default, Clone)]
struct IdentityHasherBuilder;
impl BuildHasher for IdentityHasherBuilder {
type Hasher = IdentityHasher;
fn build_hasher(&self) -> Self::Hasher {
Self::Hasher::default()
}
}
#[derive(Default)]
struct IdentityHasher {
value: u64,
}
impl Hasher for IdentityHasher {
fn finish(&self) -> u64 {
self.value
}
fn write(&mut self, _bytes: &[u8]) {
unreachable!("Should only be used on u64 values")
}
fn write_u64(&mut self, i: u64) {
self.value = i
}
}

@ -0,0 +1,669 @@
use crate::oxrdf::named_node::{NamedNode, NamedNodeRef};
use crate::oxrdf::vocab::{rdf, xsd};
#[cfg(feature = "oxsdatatypes")]
use crate::oxsdatatypes::*;
use oxilangtag::{LanguageTag, LanguageTagParseError};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::fmt;
use std::fmt::Write;
/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// # use oxilangtag::LanguageTagParseError;
/// use oxrdf::vocab::xsd;
/// use oxrdf::Literal;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
/// Literal::new_simple_literal("foo\nbar").to_string()
/// );
///
/// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
///
/// assert_eq!(
/// r#""foo"@en"#,
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// );
/// # Result::<(), LanguageTagParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)]
pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)]
enum LiteralContent {
String(String),
LanguageTaggedString { value: String, language: String },
TypedLiteral { value: String, datatype: NamedNode },
}
impl Literal {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub fn new_simple_literal(value: impl Into<String>) -> Self {
Self(LiteralContent::String(value.into()))
}
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
#[inline]
pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self {
let value = value.into();
let datatype = datatype.into();
Self(if datatype == xsd::STRING {
LiteralContent::String(value)
} else {
LiteralContent::TypedLiteral { value, datatype }
})
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
#[inline]
pub fn new_language_tagged_literal(
value: impl Into<String>,
language: impl Into<String>,
) -> Result<Self, LanguageTagParseError> {
let mut language = language.into();
language.make_ascii_lowercase();
Ok(Self::new_language_tagged_literal_unchecked(
value,
LanguageTag::parse(language)?.into_inner(),
))
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// It is the responsibility of the caller to check that `language`
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
/// and is lowercase.
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_language_tagged_literal_unchecked(
value: impl Into<String>,
language: impl Into<String>,
) -> Self {
Self(LiteralContent::LanguageTaggedString {
value: value.into(),
language: language.into(),
})
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form).
#[inline]
pub fn value(&self) -> &str {
self.as_ref().value()
}
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub fn language(&self) -> Option<&str> {
self.as_ref().language()
}
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub fn datatype(&self) -> NamedNodeRef<'_> {
self.as_ref().datatype()
}
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub fn is_plain(&self) -> bool {
self.as_ref().is_plain()
}
#[inline]
pub fn as_ref(&self) -> LiteralRef<'_> {
LiteralRef(match &self.0 {
LiteralContent::String(value) => LiteralRefContent::String(value),
LiteralContent::LanguageTaggedString { value, language } => {
LiteralRefContent::LanguageTaggedString { value, language }
}
LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral {
value,
datatype: datatype.as_ref(),
},
})
}
/// Extract components from this literal (value, datatype and language tag).
#[inline]
pub fn destruct(self) -> (String, Option<NamedNode>, Option<String>) {
match self.0 {
LiteralContent::String(s) => (s, None, None),
LiteralContent::LanguageTaggedString { value, language } => {
(value, None, Some(language))
}
LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None),
}
}
}
impl fmt::Display for Literal {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
impl<'a> From<&'a str> for Literal {
#[inline]
fn from(value: &'a str) -> Self {
Self(LiteralContent::String(value.into()))
}
}
impl From<String> for Literal {
#[inline]
fn from(value: String) -> Self {
Self(LiteralContent::String(value))
}
}
impl<'a> From<Cow<'a, str>> for Literal {
#[inline]
fn from(value: Cow<'a, str>) -> Self {
Self(LiteralContent::String(value.into()))
}
}
impl From<bool> for Literal {
#[inline]
fn from(value: bool) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::BOOLEAN.into(),
})
}
}
impl From<i128> for Literal {
#[inline]
fn from(value: i128) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i64> for Literal {
#[inline]
fn from(value: i64) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i32> for Literal {
#[inline]
fn from(value: i32) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<i16> for Literal {
#[inline]
fn from(value: i16) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u64> for Literal {
#[inline]
fn from(value: u64) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u32> for Literal {
#[inline]
fn from(value: u32) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<u16> for Literal {
#[inline]
fn from(value: u16) -> Self {
Self(LiteralContent::TypedLiteral {
value: value.to_string(),
datatype: xsd::INTEGER.into(),
})
}
}
impl From<f32> for Literal {
#[inline]
fn from(value: f32) -> Self {
Self(LiteralContent::TypedLiteral {
value: if value == f32::INFINITY {
"INF".to_owned()
} else if value == f32::NEG_INFINITY {
"-INF".to_owned()
} else {
value.to_string()
},
datatype: xsd::FLOAT.into(),
})
}
}
impl From<f64> for Literal {
#[inline]
fn from(value: f64) -> Self {
Self(LiteralContent::TypedLiteral {
value: if value == f64::INFINITY {
"INF".to_owned()
} else if value == f64::NEG_INFINITY {
"-INF".to_owned()
} else {
value.to_string()
},
datatype: xsd::DOUBLE.into(),
})
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Boolean> for Literal {
#[inline]
fn from(value: Boolean) -> Self {
Self::new_typed_literal(value.to_string(), xsd::BOOLEAN)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Float> for Literal {
#[inline]
fn from(value: Float) -> Self {
Self::new_typed_literal(value.to_string(), xsd::FLOAT)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Double> for Literal {
#[inline]
fn from(value: Double) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DOUBLE)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Integer> for Literal {
#[inline]
fn from(value: Integer) -> Self {
Self::new_typed_literal(value.to_string(), xsd::INTEGER)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Decimal> for Literal {
#[inline]
fn from(value: Decimal) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DECIMAL)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<DateTime> for Literal {
#[inline]
fn from(value: DateTime) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DATE_TIME)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Time> for Literal {
#[inline]
fn from(value: Time) -> Self {
Self::new_typed_literal(value.to_string(), xsd::TIME)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Date> for Literal {
#[inline]
fn from(value: Date) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DATE)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GYearMonth> for Literal {
#[inline]
fn from(value: GYearMonth) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_YEAR_MONTH)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GYear> for Literal {
#[inline]
fn from(value: GYear) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_YEAR)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GMonthDay> for Literal {
#[inline]
fn from(value: GMonthDay) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_MONTH_DAY)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GMonth> for Literal {
#[inline]
fn from(value: GMonth) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_MONTH)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<GDay> for Literal {
#[inline]
fn from(value: GDay) -> Self {
Self::new_typed_literal(value.to_string(), xsd::G_DAY)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<Duration> for Literal {
#[inline]
fn from(value: Duration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DURATION)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<YearMonthDuration> for Literal {
#[inline]
fn from(value: YearMonthDuration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::YEAR_MONTH_DURATION)
}
}
#[cfg(feature = "oxsdatatypes")]
impl From<DayTimeDuration> for Literal {
#[inline]
fn from(value: DayTimeDuration) -> Self {
Self::new_typed_literal(value.to_string(), xsd::DAY_TIME_DURATION)
}
}
/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::vocab::xsd;
/// use oxrdf::LiteralRef;
///
/// assert_eq!(
/// "\"foo\\nbar\"",
/// LiteralRef::new_simple_literal("foo\nbar").to_string()
/// );
///
/// assert_eq!(
/// r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
/// LiteralRef::new_typed_literal("1999-01-01", xsd::DATE).to_string()
/// );
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
pub struct LiteralRef<'a>(LiteralRefContent<'a>);
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
enum LiteralRefContent<'a> {
String(&'a str),
LanguageTaggedString {
value: &'a str,
language: &'a str,
},
TypedLiteral {
value: &'a str,
datatype: NamedNodeRef<'a>,
},
}
impl<'a> LiteralRef<'a> {
/// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
#[inline]
pub const fn new_simple_literal(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
/// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
#[inline]
pub fn new_typed_literal(value: &'a str, datatype: impl Into<NamedNodeRef<'a>>) -> Self {
let datatype = datatype.into();
LiteralRef(if datatype == xsd::STRING {
LiteralRefContent::String(value)
} else {
LiteralRefContent::TypedLiteral { value, datatype }
})
}
/// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// It is the responsibility of the caller to check that `language`
/// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
/// and is lowercase.
///
/// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
#[inline]
pub const fn value(self) -> &'a str {
match self.0 {
LiteralRefContent::String(value)
| LiteralRefContent::LanguageTaggedString { value, .. }
| LiteralRefContent::TypedLiteral { value, .. } => value,
}
}
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
/// They are normalized to lowercase by this implementation.
#[inline]
pub const fn language(self) -> Option<&'a str> {
match self.0 {
LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
_ => None,
}
}
/// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
///
/// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
/// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub const fn datatype(self) -> NamedNodeRef<'a> {
match self.0 {
LiteralRefContent::String(_) => xsd::STRING,
LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING,
LiteralRefContent::TypedLiteral { datatype, .. } => datatype,
}
}
/// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
///
/// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
/// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
#[inline]
pub const fn is_plain(self) -> bool {
matches!(
self.0,
LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
)
}
#[inline]
pub fn into_owned(self) -> Literal {
Literal(match self.0 {
LiteralRefContent::String(value) => LiteralContent::String(value.to_owned()),
LiteralRefContent::LanguageTaggedString { value, language } => {
LiteralContent::LanguageTaggedString {
value: value.to_owned(),
language: language.to_owned(),
}
}
LiteralRefContent::TypedLiteral { value, datatype } => LiteralContent::TypedLiteral {
value: value.to_owned(),
datatype: datatype.into_owned(),
},
})
}
/// Extract components from this literal
#[inline]
pub const fn destruct(self) -> (&'a str, Option<NamedNodeRef<'a>>, Option<&'a str>) {
match self.0 {
LiteralRefContent::String(s) => (s, None, None),
LiteralRefContent::LanguageTaggedString { value, language } => {
(value, None, Some(language))
}
LiteralRefContent::TypedLiteral { value, datatype } => (value, Some(datatype), None),
}
}
}
impl fmt::Display for LiteralRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.0 {
LiteralRefContent::String(value) => print_quoted_str(value, f),
LiteralRefContent::LanguageTaggedString { value, language } => {
print_quoted_str(value, f)?;
write!(f, "@{language}")
}
LiteralRefContent::TypedLiteral { value, datatype } => {
print_quoted_str(value, f)?;
write!(f, "^^{datatype}")
}
}
}
}
impl<'a> From<&'a Literal> for LiteralRef<'a> {
#[inline]
fn from(node: &'a Literal) -> Self {
node.as_ref()
}
}
impl<'a> From<LiteralRef<'a>> for Literal {
#[inline]
fn from(node: LiteralRef<'a>) -> Self {
node.into_owned()
}
}
impl<'a> From<&'a str> for LiteralRef<'a> {
#[inline]
fn from(value: &'a str) -> Self {
LiteralRef(LiteralRefContent::String(value))
}
}
impl PartialEq<Literal> for LiteralRef<'_> {
#[inline]
fn eq(&self, other: &Literal) -> bool {
*self == other.as_ref()
}
}
impl PartialEq<LiteralRef<'_>> for Literal {
#[inline]
fn eq(&self, other: &LiteralRef<'_>) -> bool {
self.as_ref() == *other
}
}
#[inline]
pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
f.write_char('"')?;
for c in string.chars() {
match c {
'\u{08}' => f.write_str("\\b"),
'\t' => f.write_str("\\t"),
'\n' => f.write_str("\\n"),
'\u{0C}' => f.write_str("\\f"),
'\r' => f.write_str("\\r"),
'"' => f.write_str("\\\""),
'\\' => f.write_str("\\\\"),
'\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
_ => f.write_char(c),
}?;
}
f.write_char('"')
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn test_simple_literal_equality() {
assert_eq!(
Literal::new_simple_literal("foo"),
Literal::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
Literal::new_simple_literal("foo"),
LiteralRef::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
LiteralRef::new_simple_literal("foo"),
Literal::new_typed_literal("foo", xsd::STRING)
);
assert_eq!(
LiteralRef::new_simple_literal("foo"),
LiteralRef::new_typed_literal("foo", xsd::STRING)
);
}
#[test]
fn test_float_format() {
assert_eq!("INF", Literal::from(f32::INFINITY).value());
assert_eq!("INF", Literal::from(f64::INFINITY).value());
assert_eq!("-INF", Literal::from(f32::NEG_INFINITY).value());
assert_eq!("-INF", Literal::from(f64::NEG_INFINITY).value());
assert_eq!("NaN", Literal::from(f32::NAN).value());
assert_eq!("NaN", Literal::from(f64::NAN).value());
}
}

@ -0,0 +1,24 @@
mod blank_node;
pub mod dataset;
pub mod graph;
mod interning;
mod literal;
mod named_node;
mod parser;
mod triple;
mod variable;
pub mod vocab;
pub use crate::oxrdf::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef};
pub use crate::oxrdf::dataset::Dataset;
pub use crate::oxrdf::graph::Graph;
pub use crate::oxrdf::literal::{Literal, LiteralRef};
pub use crate::oxrdf::named_node::{NamedNode, NamedNodeRef};
pub use crate::oxrdf::parser::TermParseError;
pub use crate::oxrdf::triple::{
GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Subject,
SubjectRef, Term, TermRef, Triple, TripleRef, TryFromTermError,
};
pub use crate::oxrdf::variable::{Variable, VariableNameParseError, VariableRef};
pub use oxilangtag::LanguageTagParseError;
pub use oxiri::IriParseError;

@ -0,0 +1,237 @@
use oxiri::{Iri, IriParseError};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
/// An owned RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::NamedNode;
///
/// assert_eq!(
/// "<http://example.com/foo>",
/// NamedNode::new("http://example.com/foo")?.to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash, Serialize, Deserialize)]
pub struct NamedNode {
iri: String,
}
impl NamedNode {
/// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
pub fn new(iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self::new_from_iri(Iri::parse(iri.into())?))
}
#[inline]
pub(crate) fn new_from_iri(iri: Iri<String>) -> Self {
Self::new_unchecked(iri.into_inner())
}
/// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string.
///
/// It is the caller's responsibility to ensure that `iri` is a valid IRI.
///
/// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(iri: impl Into<String>) -> Self {
Self { iri: iri.into() }
}
#[inline]
pub fn as_str(&self) -> &str {
self.iri.as_str()
}
#[inline]
pub fn into_string(self) -> String {
self.iri
}
#[inline]
pub fn as_ref(&self) -> NamedNodeRef<'_> {
NamedNodeRef::new_unchecked(&self.iri)
}
}
impl fmt::Display for NamedNode {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
impl PartialEq<str> for NamedNode {
#[inline]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<NamedNode> for str {
#[inline]
fn eq(&self, other: &NamedNode) -> bool {
self == other.as_str()
}
}
impl PartialEq<&str> for NamedNode {
#[inline]
fn eq(&self, other: &&str) -> bool {
self == *other
}
}
impl PartialEq<NamedNode> for &str {
#[inline]
fn eq(&self, other: &NamedNode) -> bool {
*self == other
}
}
/// A borrowed RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri).
///
/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
/// ```
/// use oxrdf::NamedNodeRef;
///
/// assert_eq!(
/// "<http://example.com/foo>",
/// NamedNodeRef::new("http://example.com/foo")?.to_string()
/// );
/// # Result::<_,oxrdf::IriParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct NamedNodeRef<'a> {
iri: &'a str,
}
impl<'a> NamedNodeRef<'a> {
/// Builds and validate an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
pub fn new(iri: &'a str) -> Result<Self, IriParseError> {
Ok(Self::new_from_iri(Iri::parse(iri)?))
}
#[inline]
pub(crate) fn new_from_iri(iri: Iri<&'a str>) -> Self {
Self::new_unchecked(iri.into_inner())
}
/// Builds an RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) from a string.
///
/// It is the caller's responsibility to ensure that `iri` is a valid IRI.
///
/// [`NamedNode::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub const fn new_unchecked(iri: &'a str) -> Self {
Self { iri }
}
#[inline]
pub const fn as_str(self) -> &'a str {
self.iri
}
#[inline]
pub fn into_owned(self) -> NamedNode {
NamedNode::new_unchecked(self.iri)
}
}
impl fmt::Display for NamedNodeRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<{}>", self.as_str())
}
}
impl From<NamedNodeRef<'_>> for NamedNode {
#[inline]
fn from(node: NamedNodeRef<'_>) -> Self {
node.into_owned()
}
}
impl<'a> From<&'a NamedNode> for NamedNodeRef<'a> {
#[inline]
fn from(node: &'a NamedNode) -> Self {
node.as_ref()
}
}
impl PartialEq<NamedNode> for NamedNodeRef<'_> {
#[inline]
fn eq(&self, other: &NamedNode) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialEq<NamedNodeRef<'_>> for NamedNode {
#[inline]
fn eq(&self, other: &NamedNodeRef<'_>) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialEq<str> for NamedNodeRef<'_> {
#[inline]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<NamedNodeRef<'_>> for str {
#[inline]
fn eq(&self, other: &NamedNodeRef<'_>) -> bool {
self == other.as_str()
}
}
impl PartialEq<&str> for NamedNodeRef<'_> {
#[inline]
fn eq(&self, other: &&str) -> bool {
self == *other
}
}
impl PartialEq<NamedNodeRef<'_>> for &str {
#[inline]
fn eq(&self, other: &NamedNodeRef<'_>) -> bool {
*self == other
}
}
impl PartialOrd<NamedNode> for NamedNodeRef<'_> {
#[inline]
fn partial_cmp(&self, other: &NamedNode) -> Option<Ordering> {
self.partial_cmp(&other.as_ref())
}
}
impl PartialOrd<NamedNodeRef<'_>> for NamedNode {
#[inline]
fn partial_cmp(&self, other: &NamedNodeRef<'_>) -> Option<Ordering> {
self.as_ref().partial_cmp(other)
}
}
impl From<Iri<String>> for NamedNode {
#[inline]
fn from(iri: Iri<String>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}
impl<'a> From<Iri<&'a str>> for NamedNodeRef<'a> {
#[inline]
fn from(iri: Iri<&'a str>) -> Self {
Self {
iri: iri.into_inner(),
}
}
}

@ -0,0 +1,469 @@
use crate::oxrdf::vocab::xsd;
use crate::oxrdf::{
BlankNode, BlankNodeIdParseError, IriParseError, LanguageTagParseError, Literal, NamedNode,
Term, Variable, VariableNameParseError,
};
#[cfg(feature = "rdf-star")]
use crate::oxrdf::{Subject, Triple};
use std::char;
use std::str::{Chars, FromStr};
/// This limit is set in order to avoid stack overflow error when parsing nested triples due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 128;
impl FromStr for NamedNode {
type Err = TermParseError;
/// Parses a named node from its NTriples and Turtle serialization
///
/// ```
/// use oxrdf::NamedNode;
/// use std::str::FromStr;
///
/// assert_eq!(
/// NamedNode::from_str("<http://example.com>").unwrap(),
/// NamedNode::new("http://example.com").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_named_node(s)?;
if !left.is_empty() {
return Err(Self::Err::msg(
"Named node serialization should end with a >",
));
}
Ok(term)
}
}
impl FromStr for BlankNode {
type Err = TermParseError;
/// Parses a blank node from its NTriples and Turtle serialization
///
/// ```
/// use oxrdf::BlankNode;
/// use std::str::FromStr;
///
/// assert_eq!(
/// BlankNode::from_str("_:ex").unwrap(),
/// BlankNode::new("ex").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_blank_node(s)?;
if !left.is_empty() {
return Err(Self::Err::msg(
"Blank node serialization should not contain whitespaces",
));
}
Ok(term)
}
}
impl FromStr for Literal {
type Err = TermParseError;
/// Parses a literal from its NTriples or Turtle serialization
///
/// ```
/// use oxrdf::vocab::xsd;
/// use oxrdf::{Literal, NamedNode};
/// use std::str::FromStr;
///
/// assert_eq!(
/// Literal::from_str("\"ex\\n\"").unwrap(),
/// Literal::new_simple_literal("ex\n")
/// );
/// assert_eq!(
/// Literal::from_str("\"ex\"@en").unwrap(),
/// Literal::new_language_tagged_literal("ex", "en").unwrap()
/// );
/// assert_eq!(
/// Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(),
/// Literal::new_typed_literal(
/// "2020",
/// NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()
/// )
/// );
/// assert_eq!(
/// Literal::from_str("true").unwrap(),
/// Literal::new_typed_literal("true", xsd::BOOLEAN)
/// );
/// assert_eq!(
/// Literal::from_str("+122").unwrap(),
/// Literal::new_typed_literal("+122", xsd::INTEGER)
/// );
/// assert_eq!(
/// Literal::from_str("-122.23").unwrap(),
/// Literal::new_typed_literal("-122.23", xsd::DECIMAL)
/// );
/// assert_eq!(
/// Literal::from_str("-122e+1").unwrap(),
/// Literal::new_typed_literal("-122e+1", xsd::DOUBLE)
/// );
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_literal(s)?;
if !left.is_empty() {
return Err(Self::Err::msg("Invalid literal serialization"));
}
Ok(term)
}
}
impl FromStr for Term {
type Err = TermParseError;
/// Parses a term from its NTriples or Turtle serialization
///
/// ```
/// use oxrdf::*;
/// use std::str::FromStr;
///
/// assert_eq!(
/// Term::from_str("\"ex\"").unwrap(),
/// Literal::new_simple_literal("ex").into()
/// );
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (term, left) = read_term(s, 0)?;
if !left.is_empty() {
return Err(Self::Err::msg("Invalid term serialization"));
}
Ok(term)
}
}
impl FromStr for Variable {
type Err = TermParseError;
/// Parses a variable from its SPARQL serialization
///
/// ```
/// use oxrdf::Variable;
/// use std::str::FromStr;
///
/// assert_eq!(
/// Variable::from_str("$foo").unwrap(),
/// Variable::new("foo").unwrap()
/// )
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
if !s.starts_with('?') && !s.starts_with('$') {
return Err(Self::Err::msg(
"Variable serialization should start with ? or $",
));
}
Self::new(&s[1..]).map_err(|error| {
TermParseError(TermParseErrorKind::Variable {
value: s.to_owned(),
error,
})
})
}
}
fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
let s = s.trim();
if let Some(remain) = s.strip_prefix('<') {
let end = remain
.find('>')
.ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
let (value, remain) = remain.split_at(end);
let remain = &remain[1..];
let term = NamedNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::Iri {
value: value.to_owned(),
error,
})
})?;
Ok((term, remain))
} else {
Err(TermParseError::msg(
"Named node serialization should start with a <",
))
}
}
fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
let s = s.trim();
if let Some(remain) = s.strip_prefix("_:") {
let end = remain
.find(|v: char| {
v.is_whitespace()
|| matches!(v, '<' | '_' | '?' | '$' | '"' | '\'' | '>' | '@' | '^')
})
.unwrap_or(remain.len());
let (value, remain) = remain.split_at(end);
let term = BlankNode::new(value).map_err(|error| {
TermParseError(TermParseErrorKind::BlankNode {
value: value.to_owned(),
error,
})
})?;
Ok((term, remain))
} else {
Err(TermParseError::msg(
"Blank node serialization should start with '_:'",
))
}
}
fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
let s = s.trim();
if let Some(s) = s.strip_prefix('"') {
let mut value = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
match c {
'"' => {
let remain = chars.as_str();
return if let Some(remain) = remain.strip_prefix('@') {
let end = remain
.find(|v| !matches!(v, 'a'..='z' | 'A'..='Z' | '-'))
.unwrap_or(remain.len());
let (language, remain) = remain.split_at(end);
Ok((
Literal::new_language_tagged_literal(value, language).map_err(
|error| {
TermParseError(TermParseErrorKind::LanguageTag {
value: language.to_owned(),
error,
})
},
)?,
remain,
))
} else if let Some(remain) = remain.strip_prefix("^^") {
let (datatype, remain) = read_named_node(remain)?;
Ok((Literal::new_typed_literal(value, datatype), remain))
} else {
Ok((Literal::new_simple_literal(value), remain))
};
}
'\\' => {
if let Some(c) = chars.next() {
value.push(match c {
't' => '\t',
'b' => '\u{08}',
'n' => '\n',
'r' => '\r',
'f' => '\u{0C}',
'"' => '"',
'\'' => '\'',
'\\' => '\\',
'u' => read_hexa_char(&mut chars, 4)?,
'U' => read_hexa_char(&mut chars, 8)?,
_ => return Err(TermParseError::msg("Unexpected escaped char")),
})
} else {
return Err(TermParseError::msg("Unexpected literal end"));
}
}
_ => value.push(c),
}
}
Err(TermParseError::msg("Unexpected literal end"))
} else if let Some(remain) = s.strip_prefix("true") {
Ok((Literal::new_typed_literal("true", xsd::BOOLEAN), remain))
} else if let Some(remain) = s.strip_prefix("false") {
Ok((Literal::new_typed_literal("false", xsd::BOOLEAN), remain))
} else {
let input = s.as_bytes();
if input.is_empty() {
return Err(TermParseError::msg("Empty term serialization"));
}
let mut cursor = match input.first() {
Some(b'+' | b'-') => 1,
_ => 0,
};
let mut with_dot = false;
let mut count_before: usize = 0;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_before += 1;
cursor += 1;
}
let mut count_after: usize = 0;
if cursor < input.len() && input[cursor] == b'.' {
with_dot = true;
cursor += 1;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_after += 1;
cursor += 1;
}
}
if cursor < input.len() && (input[cursor] == b'e' || input[cursor] == b'E') {
cursor += 1;
cursor += match input.get(cursor) {
Some(b'+' | b'-') => 1,
_ => 0,
};
let mut count_exponent = 0;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_exponent += 1;
cursor += 1;
}
if count_exponent > 0 {
Ok((Literal::new_typed_literal(s, xsd::DOUBLE), &s[cursor..]))
} else {
Err(TermParseError::msg(
"Double serialization with an invalid exponent",
))
}
} else if with_dot {
if count_after > 0 {
Ok((Literal::new_typed_literal(s, xsd::DECIMAL), &s[cursor..]))
} else {
Err(TermParseError::msg(
"Decimal serialization without floating part",
))
}
} else if count_before > 0 {
Ok((Literal::new_typed_literal(s, xsd::INTEGER), &s[cursor..]))
} else {
Err(TermParseError::msg("Empty integer serialization"))
}
}
}
fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> {
if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
return Err(TermParseError::msg(
"Too many nested triples. The parser fails here to avoid a stack overflow.",
));
}
let s = s.trim();
#[allow(unused_variables)]
if let Some(remain) = s.strip_prefix("<<") {
#[cfg(feature = "rdf-star")]
{
let (subject, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let (predicate, remain) = read_named_node(remain)?;
let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?;
let remain = remain.trim_start();
if let Some(remain) = remain.strip_prefix(">>") {
Ok((
Triple {
subject: match subject {
Term::NamedNode(s) => s.into(),
Term::BlankNode(s) => s.into(),
Term::Literal(_) => {
return Err(TermParseError::msg(
"Literals are not allowed in subject position",
));
}
Term::Triple(s) => Subject::Triple(s),
},
predicate,
object,
}
.into(),
remain,
))
} else {
Err(TermParseError::msg(
"Nested triple serialization should be enclosed between << and >>",
))
}
}
#[cfg(not(feature = "rdf-star"))]
{
Err(TermParseError::msg("RDF-star is not supported"))
}
} else if s.starts_with('<') {
let (term, remain) = read_named_node(s)?;
Ok((term.into(), remain))
} else if s.starts_with('_') {
let (term, remain) = read_blank_node(s)?;
Ok((term.into(), remain))
} else {
let (term, remain) = read_literal(s)?;
Ok((term.into(), remain))
}
}
fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> {
let mut value = 0;
for _ in 0..len {
if let Some(c) = input.next() {
value = value * 16
+ match c {
'0'..='9' => u32::from(c) - u32::from('0'),
'a'..='f' => u32::from(c) - u32::from('a') + 10,
'A'..='F' => u32::from(c) - u32::from('A') + 10,
_ => {
return Err(TermParseError::msg(
"Unexpected character in a unicode escape",
));
}
}
} else {
return Err(TermParseError::msg("Unexpected literal string end"));
}
}
char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point"))
}
/// An error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct TermParseError(#[from] TermParseErrorKind);
/// An internal error raised during term serialization parsing using the [`FromStr`] trait.
#[derive(Debug, thiserror::Error)]
enum TermParseErrorKind {
#[error("Error while parsing the named node '{value}': {error}")]
Iri { error: IriParseError, value: String },
#[error("Error while parsing the blank node '{value}': {error}")]
BlankNode {
error: BlankNodeIdParseError,
value: String,
},
#[error("Error while parsing the language tag '{value}': {error}")]
LanguageTag {
error: LanguageTagParseError,
value: String,
},
#[error("Error while parsing the variable '{value}': {error}")]
Variable {
error: VariableNameParseError,
value: String,
},
#[error("{0}")]
Msg(&'static str),
}
impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self(TermParseErrorKind::Msg(msg))
}
}
#[cfg(test)]
#[cfg(feature = "rdf-star")]
mod tests {
use super::*;
#[test]
fn triple_term_parsing() {
assert_eq!(
Term::from_str("\"ex\"").unwrap(),
Literal::new_simple_literal("ex").into()
);
assert_eq!(
Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
Triple::new(
BlankNode::new("s").unwrap(),
NamedNode::new("http://example.com/p").unwrap(),
Literal::new_simple_literal("o"),
)
.into()
);
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,216 @@
use std::cmp::Ordering;
use std::fmt;
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) owned variable.
///
/// The default string formatter is returning a SPARQL compatible representation:
/// ```
/// use oxrdf::{Variable, VariableNameParseError};
///
/// assert_eq!("?foo", Variable::new("foo")?.to_string());
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Variable {
name: String,
}
impl Variable {
/// Creates a variable name from a unique identifier.
///
/// The variable identifier must be valid according to the SPARQL grammar.
pub fn new(name: impl Into<String>) -> Result<Self, VariableNameParseError> {
let name = name.into();
validate_variable_identifier(&name)?;
Ok(Self::new_unchecked(name))
}
/// Creates a variable name from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to the SPARQL grammar.
///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new_unchecked(name: impl Into<String>) -> Self {
Self { name: name.into() }
}
#[inline]
pub fn as_str(&self) -> &str {
&self.name
}
#[inline]
pub fn into_string(self) -> String {
self.name
}
#[inline]
pub fn as_ref(&self) -> VariableRef<'_> {
VariableRef { name: &self.name }
}
}
impl fmt::Display for Variable {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_ref().fmt(f)
}
}
/// A [SPARQL query](https://www.w3.org/TR/sparql11-query/) borrowed variable.
///
/// The default string formatter is returning a SPARQL compatible representation:
/// ```
/// use oxrdf::{VariableNameParseError, VariableRef};
///
/// assert_eq!("?foo", VariableRef::new("foo")?.to_string());
/// # Result::<_,VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
pub struct VariableRef<'a> {
name: &'a str,
}
impl<'a> VariableRef<'a> {
/// Creates a variable name from a unique identifier.
///
/// The variable identifier must be valid according to the SPARQL grammar.
pub fn new(name: &'a str) -> Result<Self, VariableNameParseError> {
validate_variable_identifier(name)?;
Ok(Self::new_unchecked(name))
}
/// Creates a variable name from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to the SPARQL grammar.
///
/// [`Variable::new()`] is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub const fn new_unchecked(name: &'a str) -> Self {
Self { name }
}
#[inline]
pub const fn as_str(self) -> &'a str {
self.name
}
#[inline]
pub fn into_string(self) -> String {
self.name.to_owned()
}
#[inline]
pub fn into_owned(self) -> Variable {
Variable {
name: self.name.to_owned(),
}
}
}
impl fmt::Display for VariableRef<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "?{}", self.name)
}
}
impl<'a> From<&'a Variable> for VariableRef<'a> {
#[inline]
fn from(variable: &'a Variable) -> Self {
variable.as_ref()
}
}
impl<'a> From<VariableRef<'a>> for Variable {
#[inline]
fn from(variable: VariableRef<'a>) -> Self {
variable.into_owned()
}
}
impl PartialEq<Variable> for VariableRef<'_> {
#[inline]
fn eq(&self, other: &Variable) -> bool {
*self == other.as_ref()
}
}
impl PartialEq<VariableRef<'_>> for Variable {
#[inline]
fn eq(&self, other: &VariableRef<'_>) -> bool {
self.as_ref() == *other
}
}
impl PartialOrd<Variable> for VariableRef<'_> {
#[inline]
fn partial_cmp(&self, other: &Variable) -> Option<Ordering> {
self.partial_cmp(&other.as_ref())
}
}
impl PartialOrd<VariableRef<'_>> for Variable {
#[inline]
fn partial_cmp(&self, other: &VariableRef<'_>) -> Option<Ordering> {
self.as_ref().partial_cmp(other)
}
}
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError)?;
match front {
'0'..='9'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError),
}
for c in chars {
match c {
'0'..='9'
| '\u{00B7}'
| '\u{0300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError),
}
}
Ok(())
}
/// An error raised during [`Variable`] name validation.
#[derive(Debug, thiserror::Error)]
#[error("The variable name is invalid")]
pub struct VariableNameParseError;

@ -0,0 +1,242 @@
//! Provides ready to use [`NamedNodeRef`](super::NamedNodeRef)s for basic RDF vocabularies.
pub mod rdf {
//! [RDF](https://www.w3.org/TR/rdf11-concepts/) vocabulary.
use crate::oxrdf::named_node::NamedNodeRef;
/// The class of containers of alternatives.
pub const ALT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Alt");
/// The class of unordered containers.
pub const BAG: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag");
/// The first item in the subject RDF list.
pub const FIRST: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#first");
/// The class of HTML literal values.
pub const HTML: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML");
/// The class of language-tagged string literal values.
pub const LANG_STRING: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString");
/// The class of RDF lists.
pub const LIST: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#List");
/// The empty list.
pub const NIL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil");
/// The object of the subject RDF statement.
pub const OBJECT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#object");
/// The predicate of the subject RDF statement.
pub const PREDICATE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate");
/// The class of RDF properties.
pub const PROPERTY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property");
/// The rest of the subject RDF list after the first item.
pub const REST: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest");
/// The class of ordered containers.
pub const SEQ: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq");
/// The class of RDF statements.
pub const STATEMENT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement");
/// The subject of the subject RDF statement.
pub const SUBJECT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#subject");
/// The subject is an instance of a class.
pub const TYPE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
/// Idiomatic property used for structured values.
pub const VALUE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#value");
/// The class of XML literal values.
pub const XML_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral");
}
pub mod rdfs {
//! [RDFS](https://www.w3.org/TR/rdf-schema/) vocabulary.
use crate::oxrdf::named_node::NamedNodeRef;
/// The class of classes.
pub const CLASS: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Class");
/// A description of the subject resource.
pub const COMMENT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#comment");
/// The class of RDF containers.
pub const CONTAINER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Container");
/// The class of container membership properties, `rdf:_1`, `rdf:_2`, ..., all of which are sub-properties of `member`.
pub const CONTAINER_MEMBERSHIP_PROPERTY: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(
"http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty",
);
/// The class of RDF datatypes.
pub const DATATYPE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Datatype");
/// A domain of the subject property.
pub const DOMAIN: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#domain");
/// The definition of the subject resource.
pub const IS_DEFINED_BY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#isDefinedBy");
/// A human-readable name for the subject.
pub const LABEL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#label");
/// The class of literal values, e.g. textual strings and integers.
pub const LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Literal");
/// A member of the subject resource.
pub const MEMBER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#member");
/// A range of the subject property.
pub const RANGE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#range");
/// The class resource, everything.
pub const RESOURCE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Resource");
/// Further information about the subject resource.
pub const SEE_ALSO: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#seeAlso");
/// The subject is a subclass of a class.
pub const SUB_CLASS_OF: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subClassOf");
/// The subject is a subproperty of a property.
pub const SUB_PROPERTY_OF: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subPropertyOf");
}
pub mod xsd {
//! [RDF compatible XSD datatypes](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-compatible-xsd-types).
use crate::oxrdf::named_node::NamedNodeRef;
/// Absolute or relative URIs and IRIs.
pub const ANY_URI: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#anyURI");
/// Base64-encoded binary data.
pub const BASE_64_BINARY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#base64Binary");
/// true, false.
pub const BOOLEAN: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#boolean");
/// 128…+127 (8 bit).
pub const BYTE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#byte");
/// Dates (yyyy-mm-dd) with or without timezone.
pub const DATE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#date");
/// Duration of time (days, hours, minutes, seconds only).
pub const DAY_TIME_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dayTimeDuration");
/// Date and time with or without timezone.
pub const DATE_TIME: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTime");
/// Date and time with required timezone.
pub const DATE_TIME_STAMP: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#dateTimeStamp");
/// Arbitrary-precision decimal numbers.
pub const DECIMAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#decimal");
/// 64-bit floating point numbers incl. ±Inf, ±0, NaN.
pub const DOUBLE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#double");
/// Duration of time.
pub const DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#duration");
/// 32-bit floating point numbers incl. ±Inf, ±0, NaN.
pub const FLOAT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#float");
/// Gregorian calendar day of the month.
pub const G_DAY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gDay");
/// Gregorian calendar month.
pub const G_MONTH: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonth");
/// Gregorian calendar month and day.
pub const G_MONTH_DAY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gMonthDay");
/// Gregorian calendar year.
pub const G_YEAR: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYear");
/// Gregorian calendar year and month.
pub const G_YEAR_MONTH: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#gYearMonth");
/// Hex-encoded binary data.
pub const HEX_BINARY: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#hexBinary");
/// -2147483648…+2147483647 (32 bit).
pub const INT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#int");
/// Arbitrary-size integer numbers.
pub const INTEGER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#integer");
/// Language tags per [BCP47](http://tools.ietf.org/html/bcp47).
pub const LANGUAGE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#language");
/// -9223372036854775808…+9223372036854775807 (64 bit).
pub const LONG: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#long");
/// XML Names.
pub const NAME: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#Name");
/// XML NCName.
pub const NC_NAME: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NCName");
/// Integer numbers <0.
pub const NEGATIVE_INTEGER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#negativeInteger");
/// XML NMTOKENs.
pub const NMTOKEN: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#NMTOKEN");
/// Integer numbers ≥0.
pub const NON_NEGATIVE_INTEGER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonNegativeInteger");
/// Integer numbers ≤0.
pub const NON_POSITIVE_INTEGER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#nonPositiveInteger");
/// Whitespace-normalized strings.
pub const NORMALIZED_STRING: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#normalizedString");
/// Integer numbers >0.
pub const POSITIVE_INTEGER: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#positiveInteger");
/// Times (hh:mm:ss.sss…) with or without timezone.
pub const TIME: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#time");
/// -32768…+32767 (16 bit).
pub const SHORT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#short");
/// Character strings (but not all Unicode character strings).
pub const STRING: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#string");
/// Tokenized strings.
pub const TOKEN: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#token");
/// 0…255 (8 bit).
pub const UNSIGNED_BYTE: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedByte");
/// 0…4294967295 (32 bit).
pub const UNSIGNED_INT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedInt");
/// 0…18446744073709551615 (64 bit).
pub const UNSIGNED_LONG: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedLong");
/// 0…65535 (16 bit).
pub const UNSIGNED_SHORT: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#unsignedShort");
/// Duration of time (months and years only).
pub const YEAR_MONTH_DURATION: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.w3.org/2001/XMLSchema#yearMonthDuration");
}
pub mod geosparql {
//! [GeoSpatial](https://opengeospatial.github.io/ogc-geosparql/) vocabulary.
use crate::oxrdf::named_node::NamedNodeRef;
/// Geospatial datatype like `"Point({longitude} {latitude})"^^geo:wktLiteral`
pub const WKT_LITERAL: NamedNodeRef<'_> =
NamedNodeRef::new_unchecked("http://www.opengis.net/ont/geosparql#wktLiteral");
}

@ -0,0 +1,67 @@
OxRDF I/O
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfio.svg)](https://crates.io/crates/oxrdfio)
[![Released API docs](https://docs.rs/oxrdfio/badge.svg)](https://docs.rs/oxrdfio)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfio)](https://crates.io/crates/oxrdfio)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRDF I/O is a set of parsers and serializers for RDF.
It supports:
* [N3](https://w3c.github.io/N3/spec/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Quads](https://www.w3.org/TR/n-quads/) using [`oxttl`](https://crates.io/crates/oxttl)
* [N-Triples](https://www.w3.org/TR/n-triples/) using [`oxttl`](https://crates.io/crates/oxttl)
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) using [`oxrdfxml`](https://crates.io/crates/oxrdfxml)
* [TriG](https://www.w3.org/TR/trig/) using [`oxttl`](https://crates.io/crates/oxttl)
* [Turtle](https://www.w3.org/TR/turtle/) using [`oxttl`](https://crates.io/crates/oxttl)
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
It is designed as a low level parser compatible with both synchronous and asynchronous I/O (behind the `async-tokio` feature).
The entry points of this library are the two [`RdfParser`] and [`RdfSerializer`] structs.
Usage example converting a Turtle file to a N-Triples file:
```rust
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
let turtle_file = b"@base <http://example.com/> .
@prefix schema: <http://schema.org/> .
<foo> a schema:Person ;
schema:name \"Foo\" .
<bar> a schema:Person ;
schema:name \"Bar\" .";
let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/foo> <http://schema.org/name> \"Foo\" .
<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
<http://example.com/bar> <http://schema.org/name> \"Bar\" .
";
let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
writer.write_quad(&quad.unwrap()).unwrap();
}
assert_eq!(writer.finish().unwrap(), ntriples_file);
```
Parsers for other RDF formats exists in Rust like [graph-rdfa-processor](https://github.com/nbittich/graph-rdfa-processor) for RDFa and [json-ld](https://github.com/timothee-haudebourg/json-ld) for JSON-LD.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,124 @@
use crate::oxrdfxml;
use crate::oxttl;
use std::io;
use std::ops::Range;
/// Error returned during RDF format parsing.
#[derive(Debug, thiserror::Error)]
pub enum RdfParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] RdfSyntaxError),
}
impl RdfParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self::Syntax(RdfSyntaxError(SyntaxErrorKind::Msg(msg)))
}
}
impl From<oxttl::TurtleSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxttl::TurtleSyntaxError) -> Self {
Self(SyntaxErrorKind::Turtle(error))
}
}
impl From<oxttl::TurtleParseError> for RdfParseError {
#[inline]
fn from(error: oxttl::TurtleParseError) -> Self {
match error {
oxttl::TurtleParseError::Syntax(e) => Self::Syntax(e.into()),
oxttl::TurtleParseError::Io(e) => Self::Io(e),
}
}
}
impl From<oxrdfxml::RdfXmlSyntaxError> for RdfSyntaxError {
#[inline]
fn from(error: oxrdfxml::RdfXmlSyntaxError) -> Self {
Self(SyntaxErrorKind::RdfXml(error))
}
}
impl From<oxrdfxml::RdfXmlParseError> for RdfParseError {
#[inline]
fn from(error: oxrdfxml::RdfXmlParseError) -> Self {
match error {
oxrdfxml::RdfXmlParseError::Syntax(e) => Self::Syntax(e.into()),
oxrdfxml::RdfXmlParseError::Io(e) => Self::Io(e),
}
}
}
impl From<RdfParseError> for io::Error {
#[inline]
fn from(error: RdfParseError) -> Self {
match error {
RdfParseError::Io(error) => error,
RdfParseError::Syntax(error) => error.into(),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct RdfSyntaxError(#[from] SyntaxErrorKind);
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
enum SyntaxErrorKind {
#[error(transparent)]
Turtle(#[from] oxttl::TurtleSyntaxError),
#[error(transparent)]
RdfXml(#[from] oxrdfxml::RdfXmlSyntaxError),
#[error("{0}")]
Msg(&'static str),
}
impl RdfSyntaxError {
/// The location of the error inside of the file.
#[inline]
pub fn location(&self) -> Option<Range<TextPosition>> {
match &self.0 {
SyntaxErrorKind::Turtle(e) => {
let location = e.location();
Some(
TextPosition {
line: location.start.line,
column: location.start.column,
offset: location.start.offset,
}..TextPosition {
line: location.end.line,
column: location.end.column,
offset: location.end.offset,
},
)
}
SyntaxErrorKind::RdfXml(_) | SyntaxErrorKind::Msg(_) => None,
}
}
}
impl From<RdfSyntaxError> for io::Error {
#[inline]
fn from(error: RdfSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Turtle(error) => error.into(),
SyntaxErrorKind::RdfXml(error) => error.into(),
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg),
}
}
}
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub struct TextPosition {
pub line: u64,
pub column: u64,
pub offset: u64,
}

@ -0,0 +1,216 @@
use std::fmt;
/// RDF serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD might be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum RdfFormat {
/// [N3](https://w3c.github.io/N3/spec/)
N3,
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
RdfXml,
/// [TriG](https://www.w3.org/TR/trig/)
TriG,
/// [Turtle](https://www.w3.org/TR/turtle/)
Turtle,
}
impl RdfFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ```
#[inline]
pub const fn iri(self) -> &'static str {
match self {
Self::N3 => "http://www.w3.org/ns/formats/N3",
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads",
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples",
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML",
Self::TriG => "http://www.w3.org/ns/formats/TriG",
Self::Turtle => "http://www.w3.org/ns/formats/Turtle",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.media_type(), "application/n-triples")
/// ```
#[inline]
pub const fn media_type(self) -> &'static str {
match self {
Self::N3 => "text/n3",
Self::NQuads => "application/n-quads",
Self::NTriples => "application/n-triples",
Self::RdfXml => "application/rdf+xml",
Self::TriG => "application/trig",
Self::Turtle => "text/turtle",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.file_extension(), "nt")
/// ```
#[inline]
pub const fn file_extension(self) -> &'static str {
match self {
Self::N3 => "n3",
Self::NQuads => "nq",
Self::NTriples => "nt",
Self::RdfXml => "rdf",
Self::TriG => "trig",
Self::Turtle => "ttl",
}
}
/// The format name.
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.name(), "N-Triples")
/// ```
#[inline]
pub const fn name(self) -> &'static str {
match self {
Self::N3 => "N3",
Self::NQuads => "N-Quads",
Self::NTriples => "N-Triples",
Self::RdfXml => "RDF/XML",
Self::TriG => "TriG",
Self::Turtle => "Turtle",
}
}
/// Checks if the formats supports [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and not only [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_datasets(), false);
/// assert_eq!(RdfFormat::NQuads.supports_datasets(), true);
/// ```
#[inline]
pub const fn supports_datasets(self) -> bool {
matches!(self, Self::NQuads | Self::TriG)
}
/// Checks if the formats supports [RDF-star quoted triples](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#dfn-quoted).
///
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::NTriples.supports_rdf_star(), true);
/// assert_eq!(RdfFormat::RdfXml.supports_rdf_star(), false);
/// ```
#[inline]
#[cfg(feature = "rdf-star")]
pub const fn supports_rdf_star(self) -> bool {
matches!(
self,
Self::NTriples | Self::NQuads | Self::Turtle | Self::TriG
)
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `RdfFormat::RdfXml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(
/// RdfFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(RdfFormat::Turtle)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [
("n-quads", RdfFormat::NQuads),
("n-triples", RdfFormat::NTriples),
("n3", RdfFormat::N3),
("nquads", RdfFormat::NQuads),
("ntriples", RdfFormat::NTriples),
("plain", RdfFormat::NTriples),
("rdf+xml", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("turtle", RdfFormat::Turtle),
("xml", RdfFormat::RdfXml),
];
let (r#type, subtype) = media_type
.split_once(';')
.unwrap_or((media_type, ""))
.0
.split_once('/')?;
let r#type = r#type.trim();
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") {
return None;
}
let subtype = subtype.trim();
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype);
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES {
if candidate_subtype.eq_ignore_ascii_case(subtype) {
return Some(candidate_id);
}
}
None
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxrdfio::RdfFormat;
///
/// assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples))
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, RdfFormat); 8] = [
("n3", RdfFormat::N3),
("nq", RdfFormat::NQuads),
("nt", RdfFormat::NTriples),
("rdf", RdfFormat::RdfXml),
("trig", RdfFormat::TriG),
("ttl", RdfFormat::Turtle),
("txt", RdfFormat::NTriples),
("xml", RdfFormat::RdfXml),
];
for (candidate_extension, candidate_id) in MEDIA_TYPES {
if candidate_extension.eq_ignore_ascii_case(extension) {
return Some(candidate_id);
}
}
None
}
}
impl fmt::Display for RdfFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.name())
}
}

@ -0,0 +1,9 @@
mod error;
mod format;
mod parser;
mod serializer;
pub use error::{RdfParseError, RdfSyntaxError, TextPosition};
pub use format::RdfFormat;
pub use parser::{FromReadQuadReader, RdfParser};
pub use serializer::{RdfSerializer, ToWriteQuadWriter};

@ -0,0 +1,795 @@
//! Utilities to read RDF graphs and datasets.
use crate::oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
pub use crate::oxrdfio::error::RdfParseError;
use crate::oxrdfio::format::RdfFormat;
use crate::oxrdfxml::{FromReadRdfXmlReader, RdfXmlParser};
use crate::oxttl::n3::{FromReadN3Reader, N3Parser, N3PrefixesIter, N3Quad, N3Term};
use crate::oxttl::nquads::{FromReadNQuadsReader, NQuadsParser};
use crate::oxttl::ntriples::{FromReadNTriplesReader, NTriplesParser};
use crate::oxttl::trig::{FromReadTriGReader, TriGParser, TriGPrefixesIter};
use crate::oxttl::turtle::{FromReadTurtleReader, TurtleParser, TurtlePrefixesIter};
use std::collections::HashMap;
use std::io::Read;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
/// Parsers for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// Note the useful options:
/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct RdfParser {
inner: RdfParserKind,
default_graph: GraphName,
without_named_graphs: bool,
rename_blank_nodes: bool,
}
enum RdfParserKind {
N3(N3Parser),
NQuads(NQuadsParser),
NTriples(NTriplesParser),
RdfXml(RdfXmlParser),
TriG(TriGParser),
Turtle(TurtleParser),
}
impl RdfParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
RdfFormat::NQuads => RdfParserKind::NQuads({
#[cfg(feature = "rdf-star")]
{
NQuadsParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NQuadsParser::new()
}
}),
RdfFormat::NTriples => RdfParserKind::NTriples({
#[cfg(feature = "rdf-star")]
{
NTriplesParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
NTriplesParser::new()
}
}),
RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
RdfFormat::TriG => RdfParserKind::TriG({
#[cfg(feature = "rdf-star")]
{
TriGParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TriGParser::new()
}
}),
RdfFormat::Turtle => RdfParserKind::Turtle({
#[cfg(feature = "rdf-star")]
{
TurtleParser::new().with_quoted_triples()
}
#[cfg(not(feature = "rdf-star"))]
{
TurtleParser::new()
}
}),
},
default_graph: GraphName::DefaultGraph,
without_named_graphs: false,
rename_blank_nodes: false,
}
}
/// The format the parser uses.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// assert_eq!(
/// RdfParser::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfParserKind::N3(_) => RdfFormat::N3,
RdfParserKind::NQuads(_) => RdfFormat::NQuads,
RdfParserKind::NTriples(_) => RdfFormat::NTriples,
RdfParserKind::RdfXml(_) => RdfFormat::RdfXml,
RdfParserKind::TriG(_) => RdfFormat::TriG,
RdfParserKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle).with_base_iri("http://example.com")?;
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
};
Ok(self)
}
/// Provides the name graph name that should replace the default graph in the returned quads.
///
/// ```
/// use oxrdf::NamedNode;
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::Turtle)
/// .with_default_graph(NamedNode::new("http://example.com/g")?);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
self.default_graph = default_graph.into();
self
}
/// Sets that the parser must fail if parsing a named graph.
///
/// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
/// assert!(parser.parse_read(file.as_bytes()).next().unwrap().is_err());
/// ```
#[inline]
pub fn without_named_graphs(mut self) -> Self {
self.without_named_graphs = true;
self
}
/// Renames the blank nodes ids from the ones set in the serialization to random ids.
///
/// This allows to avoid id conflicts when merging graphs together.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "_:a <http://example.com/p> <http://example.com/o> .";
///
/// let result1 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// let result2 = RdfParser::from_format(RdfFormat::NQuads)
/// .rename_blank_nodes()
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
/// assert_ne!(result1, result2);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn rename_blank_nodes(mut self) -> Self {
self.rename_blank_nodes = true;
self
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.inner = match self.inner {
RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
};
self
}
/// Parses from a [`Read`] implementation and returns an iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
pub fn parse_read<R: Read>(self, reader: R) -> FromReadQuadReader<R> {
FromReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => FromReadQuadReaderKind::N3(p.parse_read(reader)),
RdfParserKind::NQuads(p) => FromReadQuadReaderKind::NQuads(p.parse_read(reader)),
RdfParserKind::NTriples(p) => {
FromReadQuadReaderKind::NTriples(p.parse_read(reader))
}
RdfParserKind::RdfXml(p) => FromReadQuadReaderKind::RdfXml(p.parse_read(reader)),
RdfParserKind::TriG(p) => FromReadQuadReaderKind::TriG(p.parse_read(reader)),
RdfParserKind::Turtle(p) => FromReadQuadReaderKind::Turtle(p.parse_read(reader)),
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
/// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
reader: R,
) -> FromTokioAsyncReadQuadReader<R> {
FromTokioAsyncReadQuadReader {
parser: match self.inner {
RdfParserKind::N3(p) => {
FromTokioAsyncReadQuadReaderKind::N3(p.parse_tokio_async_read(reader))
}
RdfParserKind::NQuads(p) => {
FromTokioAsyncReadQuadReaderKind::NQuads(p.parse_tokio_async_read(reader))
}
RdfParserKind::NTriples(p) => {
FromTokioAsyncReadQuadReaderKind::NTriples(p.parse_tokio_async_read(reader))
}
RdfParserKind::RdfXml(p) => {
FromTokioAsyncReadQuadReaderKind::RdfXml(p.parse_tokio_async_read(reader))
}
RdfParserKind::TriG(p) => {
FromTokioAsyncReadQuadReaderKind::TriG(p.parse_tokio_async_read(reader))
}
RdfParserKind::Turtle(p) => {
FromTokioAsyncReadQuadReaderKind::Turtle(p.parse_tokio_async_read(reader))
}
},
mapper: QuadMapper {
default_graph: self.default_graph.clone(),
without_named_graphs: self.without_named_graphs,
blank_node_map: self.rename_blank_nodes.then(HashMap::new),
},
}
}
}
impl From<RdfFormat> for RdfParser {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Parses a RDF file from a [`Read`] implementation. Can be built using [`RdfParser::parse_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let quads = parser
/// .parse_read(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct FromReadQuadReader<R: Read> {
parser: FromReadQuadReaderKind<R>,
mapper: QuadMapper,
}
enum FromReadQuadReaderKind<R: Read> {
N3(FromReadN3Reader<R>),
NQuads(FromReadNQuadsReader<R>),
NTriples(FromReadNTriplesReader<R>),
RdfXml(FromReadRdfXmlReader<R>),
TriG(FromReadTriGReader<R>),
Turtle(FromReadTurtleReader<R>),
}
impl<R: Read> Iterator for FromReadQuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(match &mut self.parser {
FromReadQuadReaderKind::N3(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NQuads(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::NTriples(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::RdfXml(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::TriG(parser) => match parser.next()? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromReadQuadReaderKind::Turtle(parser) => match parser.next()? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
}
impl<R: Read> FromReadQuadReader<R> {
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromReadQuadReaderKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromReadQuadReaderKind::N3(p) => p.base_iri(),
FromReadQuadReaderKind::TriG(p) => p.base_iri(),
FromReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromReadQuadReaderKind::NQuads(_)
| FromReadQuadReaderKind::NTriples(_)
| FromReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Parses a RDF file from a Tokio [`AsyncRead`] implementation. Can be built using [`RdfParser::parse_tokio_async_read`].
///
/// Reads are buffered.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = RdfParser::from_format(RdfFormat::NTriples);
/// let mut reader = parser.parse_tokio_async_read(file.as_bytes());
/// if let Some(quad) = reader.next().await {
/// assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
/// }
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadQuadReader<R: AsyncRead + Unpin> {
parser: FromTokioAsyncReadQuadReaderKind<R>,
mapper: QuadMapper,
}
#[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadQuadReaderKind<R: AsyncRead + Unpin> {
N3(FromTokioAsyncReadN3Reader<R>),
NQuads(FromTokioAsyncReadNQuadsReader<R>),
NTriples(FromTokioAsyncReadNTriplesReader<R>),
RdfXml(FromTokioAsyncReadRdfXmlReader<R>),
TriG(FromTokioAsyncReadTriGReader<R>),
Turtle(FromTokioAsyncReadTurtleReader<R>),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadQuadReader<R> {
pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> {
Some(match &mut self.parser {
FromTokioAsyncReadQuadReaderKind::N3(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_n3_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NQuads(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::NTriples(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::RdfXml(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::TriG(parser) => match parser.next().await? {
Ok(quad) => self.mapper.map_quad(quad),
Err(e) => Err(e.into()),
},
FromTokioAsyncReadQuadReaderKind::Turtle(parser) => match parser.next().await? {
Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
Err(e) => Err(e.into()),
},
})
}
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// An empty iterator is return if the format does not support prefixes.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = RdfParser::from_format(RdfFormat::Turtle).parse_read(file.as_slice());
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Ok(())
/// # }
/// ```
pub fn prefixes(&self) -> PrefixesIter<'_> {
PrefixesIter {
inner: match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => {
PrefixesIterKind::Turtle(p.prefixes())
}
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => PrefixesIterKind::None, /* TODO: implement for RDF/XML */
},
}
}
/// The base IRI considered at the current step of the parsing.
///
/// `None` is returned if no base IRI is set or the format does not support base IRIs.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfParser};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader =
/// RdfParser::from_format(RdfFormat::Turtle).parse_tokio_async_read(file.as_slice());
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
/// # Ok(())
/// # }
/// ```
pub fn base_iri(&self) -> Option<&str> {
match &self.parser {
FromTokioAsyncReadQuadReaderKind::N3(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::TriG(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::Turtle(p) => p.base_iri(),
FromTokioAsyncReadQuadReaderKind::NQuads(_)
| FromTokioAsyncReadQuadReaderKind::NTriples(_)
| FromTokioAsyncReadQuadReaderKind::RdfXml(_) => None, // TODO: implement for RDF/XML
}
}
}
/// Iterator on the file prefixes.
///
/// See [`FromReadQuadReader::prefixes`].
pub struct PrefixesIter<'a> {
inner: PrefixesIterKind<'a>,
}
enum PrefixesIterKind<'a> {
Turtle(TurtlePrefixesIter<'a>),
TriG(TriGPrefixesIter<'a>),
N3(N3PrefixesIter<'a>),
None,
}
impl<'a> Iterator for PrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
PrefixesIterKind::Turtle(iter) => iter.next(),
PrefixesIterKind::TriG(iter) => iter.next(),
PrefixesIterKind::N3(iter) => iter.next(),
PrefixesIterKind::None => None,
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.inner {
PrefixesIterKind::Turtle(iter) => iter.size_hint(),
PrefixesIterKind::TriG(iter) => iter.size_hint(),
PrefixesIterKind::N3(iter) => iter.size_hint(),
PrefixesIterKind::None => (0, Some(0)),
}
}
}
struct QuadMapper {
default_graph: GraphName,
without_named_graphs: bool,
blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
}
impl QuadMapper {
fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
if let Some(blank_node_map) = &mut self.blank_node_map {
blank_node_map
.entry(node)
.or_insert_with(BlankNode::default)
.clone()
} else {
node
}
}
fn map_subject(&mut self, node: Subject) -> Subject {
match node {
Subject::NamedNode(node) => node.into(),
Subject::BlankNode(node) => self.map_blank_node(node).into(),
#[cfg(feature = "rdf-star")]
Subject::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_term(&mut self, node: Term) -> Term {
match node {
Term::NamedNode(node) => node.into(),
Term::BlankNode(node) => self.map_blank_node(node).into(),
Term::Literal(literal) => literal.into(),
#[cfg(feature = "rdf-star")]
Term::Triple(triple) => self.map_triple(*triple).into(),
}
}
fn map_triple(&mut self, triple: Triple) -> Triple {
Triple {
subject: self.map_subject(triple.subject),
predicate: triple.predicate,
object: self.map_term(triple.object),
}
}
fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfParseError> {
match graph_name {
GraphName::NamedNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(node.into())
}
}
GraphName::BlankNode(node) => {
if self.without_named_graphs {
Err(RdfParseError::msg("Named graphs are not allowed"))
} else {
Ok(self.map_blank_node(node).into())
}
}
GraphName::DefaultGraph => Ok(self.default_graph.clone()),
}
}
fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: self.map_subject(quad.subject),
predicate: quad.predicate,
object: self.map_term(quad.object),
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
self.map_triple(triple).in_graph(self.default_graph.clone())
}
fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfParseError> {
Ok(Quad {
subject: match quad.subject {
N3Term::NamedNode(s) => Ok(s.into()),
N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF subjects",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF subjects",
)),
}?,
predicate: match quad.predicate {
N3Term::NamedNode(p) => Ok(p),
N3Term::BlankNode(_) => Err(RdfParseError::msg(
"blank nodes are not allowed in regular RDF predicates",
)),
N3Term::Literal(_) => Err(RdfParseError::msg(
"literals are not allowed in regular RDF predicates",
)),
#[cfg(feature = "rdf-star")]
N3Term::Triple(_) => Err(RdfParseError::msg(
"quoted triples are not allowed in regular RDF predicates",
)),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF predicates",
)),
}?,
object: match quad.object {
N3Term::NamedNode(o) => Ok(o.into()),
N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
N3Term::Literal(o) => Ok(o.into()),
#[cfg(feature = "rdf-star")]
N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
N3Term::Variable(_) => Err(RdfParseError::msg(
"variables are not allowed in regular RDF objects",
)),
}?,
graph_name: self.map_graph_name(quad.graph_name)?,
})
}
}

@ -0,0 +1,412 @@
//! Utilities to write RDF graphs and datasets.
use crate::oxrdf::{GraphNameRef, IriParseError, QuadRef, TripleRef};
use crate::oxrdfio::format::RdfFormat;
#[cfg(feature = "async-tokio")]
use crate::oxrdfxml::ToTokioAsyncWriteRdfXmlWriter;
use crate::oxrdfxml::{RdfXmlSerializer, ToWriteRdfXmlWriter};
#[cfg(feature = "async-tokio")]
use crate::oxttl::nquads::ToTokioAsyncWriteNQuadsWriter;
use crate::oxttl::nquads::{NQuadsSerializer, ToWriteNQuadsWriter};
#[cfg(feature = "async-tokio")]
use crate::oxttl::ntriples::ToTokioAsyncWriteNTriplesWriter;
use crate::oxttl::ntriples::{NTriplesSerializer, ToWriteNTriplesWriter};
#[cfg(feature = "async-tokio")]
use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter;
use crate::oxttl::trig::{ToWriteTriGWriter, TriGSerializer};
#[cfg(feature = "async-tokio")]
use crate::oxttl::turtle::ToTokioAsyncWriteTurtleWriter;
use crate::oxttl::turtle::{ToWriteTurtleWriter, TurtleSerializer};
use std::io::{self, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A serializer for RDF serialization formats.
///
/// It currently supports the following formats:
/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
/// * [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct RdfSerializer {
inner: RdfSerializerKind,
}
enum RdfSerializerKind {
NQuads(NQuadsSerializer),
NTriples(NTriplesSerializer),
RdfXml(RdfXmlSerializer),
TriG(TriGSerializer),
Turtle(TurtleSerializer),
}
impl RdfSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: RdfFormat) -> Self {
Self {
inner: match format {
RdfFormat::NQuads => RdfSerializerKind::NQuads(NQuadsSerializer::new()),
RdfFormat::NTriples => RdfSerializerKind::NTriples(NTriplesSerializer::new()),
RdfFormat::RdfXml => RdfSerializerKind::RdfXml(RdfXmlSerializer::new()),
RdfFormat::TriG => RdfSerializerKind::TriG(TriGSerializer::new()),
RdfFormat::Turtle | RdfFormat::N3 => {
RdfSerializerKind::Turtle(TurtleSerializer::new())
}
},
}
}
/// The format the serializer serializes to.
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// assert_eq!(
/// RdfSerializer::from_format(RdfFormat::Turtle).format(),
/// RdfFormat::Turtle
/// );
/// ```
pub fn format(&self) -> RdfFormat {
match &self.inner {
RdfSerializerKind::NQuads(_) => RdfFormat::NQuads,
RdfSerializerKind::NTriples(_) => RdfFormat::NTriples,
RdfSerializerKind::RdfXml(_) => RdfFormat::RdfXml,
RdfSerializerKind::TriG(_) => RdfFormat::TriG,
RdfSerializerKind::Turtle(_) => RdfFormat::Turtle,
}
}
/// If the format supports it, sets a prefix.
///
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxrdfio::{RdfFormat, RdfSerializer};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::Turtle)
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef {
/// subject: NamedNodeRef::new("http://example.com/s")?.into(),
/// predicate: rdf::TYPE.into(),
/// object: NamedNodeRef::new("http://schema.org/Person")?.into(),
/// })?;
/// assert_eq!(
/// writer.finish()?,
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com/s> a schema:Person .\n"
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.inner = match self.inner {
RdfSerializerKind::NQuads(s) => RdfSerializerKind::NQuads(s),
RdfSerializerKind::NTriples(s) => RdfSerializerKind::NTriples(s),
RdfSerializerKind::RdfXml(s) => {
RdfSerializerKind::RdfXml(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::TriG(s) => {
RdfSerializerKind::TriG(s.with_prefix(prefix_name, prefix_iri)?)
}
RdfSerializerKind::Turtle(s) => {
RdfSerializerKind::Turtle(s.with_prefix(prefix_name, prefix_iri)?)
}
};
Ok(self)
}
/// Writes to a [`Write`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into()
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteQuadWriter<W> {
ToWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToWriteQuadWriterKind::NQuads(s.serialize_to_write(write))
}
RdfSerializerKind::NTriples(s) => {
ToWriteQuadWriterKind::NTriples(s.serialize_to_write(write))
}
RdfSerializerKind::RdfXml(s) => {
ToWriteQuadWriterKind::RdfXml(s.serialize_to_write(write))
}
RdfSerializerKind::TriG(s) => {
ToWriteQuadWriterKind::TriG(s.serialize_to_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToWriteQuadWriterKind::Turtle(s.serialize_to_write(write))
}
},
}
}
/// Writes to a Tokio [`AsyncWrite`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToTokioAsyncWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteQuadWriter<W> {
ToTokioAsyncWriteQuadWriter {
formatter: match self.inner {
RdfSerializerKind::NQuads(s) => {
ToTokioAsyncWriteQuadWriterKind::NQuads(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::NTriples(s) => ToTokioAsyncWriteQuadWriterKind::NTriples(
s.serialize_to_tokio_async_write(write),
),
RdfSerializerKind::RdfXml(s) => {
ToTokioAsyncWriteQuadWriterKind::RdfXml(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::TriG(s) => {
ToTokioAsyncWriteQuadWriterKind::TriG(s.serialize_to_tokio_async_write(write))
}
RdfSerializerKind::Turtle(s) => {
ToTokioAsyncWriteQuadWriterKind::Turtle(s.serialize_to_tokio_async_write(write))
}
},
}
}
}
impl From<RdfFormat> for RdfSerializer {
fn from(format: RdfFormat) -> Self {
Self::from_format(format)
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// assert_eq!(writer.finish()?, b"<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteQuadWriter<W: Write> {
formatter: ToWriteQuadWriterKind<W>,
}
enum ToWriteQuadWriterKind<W: Write> {
NQuads(ToWriteNQuadsWriter<W>),
NTriples(ToWriteNTriplesWriter<W>),
RdfXml(ToWriteRdfXmlWriter<W>),
TriG(ToWriteTriGWriter<W>),
Turtle(ToWriteTurtleWriter<W>),
}
impl<W: Write> ToWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::NTriples(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::RdfXml(writer) => writer.write_triple(to_triple(quad)?),
ToWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad),
ToWriteQuadWriterKind::Turtle(writer) => writer.write_triple(to_triple(quad)?),
}
}
/// Writes a [`TripleRef`]
pub fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToWriteQuadWriterKind::RdfXml(writer) => writer.finish()?,
ToWriteQuadWriterKind::TriG(writer) => writer.finish()?,
ToWriteQuadWriterKind::Turtle(writer) => writer.finish()?,
})
}
}
/// Writes quads or triples to a [`Write`] implementation.
///
/// Can be built using [`RdfSerializer::serialize_to_write`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteQuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// ```
/// use oxrdfio::{RdfFormat, RdfSerializer};
/// use oxrdf::{Quad, NamedNode};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = RdfSerializer::from_format(RdfFormat::NQuads).serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(&Quad {
/// subject: NamedNode::new_unchecked("http://example.com/s").into(),
/// predicate: NamedNode::new_unchecked("http://example.com/p"),
/// object: NamedNode::new_unchecked("http://example.com/o").into(),
/// graph_name: NamedNode::new_unchecked("http://example.com/g").into()
/// }).await?;
/// assert_eq!(writer.finish().await?, "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n");
/// # Ok(())
/// # }
/// ```
#[must_use]
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteQuadWriter<W: AsyncWrite + Unpin> {
formatter: ToTokioAsyncWriteQuadWriterKind<W>,
}
#[cfg(feature = "async-tokio")]
enum ToTokioAsyncWriteQuadWriterKind<W: AsyncWrite + Unpin> {
NQuads(ToTokioAsyncWriteNQuadsWriter<W>),
NTriples(ToTokioAsyncWriteNTriplesWriter<W>),
RdfXml(ToTokioAsyncWriteRdfXmlWriter<W>),
TriG(ToTokioAsyncWriteTriGWriter<W>),
Turtle(ToTokioAsyncWriteTurtleWriter<W>),
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteQuadWriter<W> {
/// Writes a [`QuadRef`]
pub async fn write_quad<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
match &mut self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => {
writer.write_triple(to_triple(quad)?).await
}
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.write_quad(quad).await,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => {
writer.write_triple(to_triple(quad)?).await
}
}
}
/// Writes a [`TripleRef`]
pub async fn write_triple<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.write_quad(triple.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
/// Writes the last bytes of the file
///
/// Note that this function does not flush the writer. You need to do that if you are using a [`BufWriter`](io::BufWriter).
pub async fn finish(self) -> io::Result<W> {
Ok(match self.formatter {
ToTokioAsyncWriteQuadWriterKind::NQuads(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::NTriples(writer) => writer.finish(),
ToTokioAsyncWriteQuadWriterKind::RdfXml(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::TriG(writer) => writer.finish().await?,
ToTokioAsyncWriteQuadWriterKind::Turtle(writer) => writer.finish().await?,
})
}
}
fn to_triple<'a>(quad: impl Into<QuadRef<'a>>) -> io::Result<TripleRef<'a>> {
let quad = quad.into();
if quad.graph_name.is_default_graph() {
Ok(quad.into())
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Only quads in the default graph can be serialized to a RDF graph format",
))
}
}

@ -0,0 +1,56 @@
OxRDF/XML
=========
[![Latest Version](https://img.shields.io/crates/v/oxrdfxml.svg)](https://crates.io/crates/oxrdfxml)
[![Released API docs](https://docs.rs/oxrdfxml/badge.svg)](https://docs.rs/oxrdfxml)
[![Crates.io downloads](https://img.shields.io/crates/d/oxrdfxml)](https://crates.io/crates/oxrdfxml)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
OxRdfXml is a parser and serializer for [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
The entry points of this library are the two [`RdfXmlParser`] and [`RdfXmlSerializer`] structs.
Usage example counting the number of people in a RDF/XML file:
```rust
use oxrdf::{NamedNodeRef, vocab::rdf};
use oxrdfxml::RdfXmlParser;
fn main() {
let file = br#"<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
<rdf:Description rdf:about="http://example.com/foo">
<rdf:type rdf:resource="http://schema.org/Person" />
<schema:name>Foo</schema:name>
</rdf:Description>
<schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
</rdf:RDF>"#;
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap();
let mut count = 0;
for triple in RdfXmlParser::new().parse_read(file.as_ref()) {
let triple = triple.unwrap();
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
count += 1;
}
}
assert_eq!(2, count);
}
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,89 @@
use oxilangtag::LanguageTagParseError;
use oxiri::IriParseError;
use std::io;
use std::sync::Arc;
/// Error returned during RDF/XML parsing.
#[derive(Debug, thiserror::Error)]
pub enum RdfXmlParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] RdfXmlSyntaxError),
}
impl From<RdfXmlParseError> for io::Error {
#[inline]
fn from(error: RdfXmlParseError) -> Self {
match error {
RdfXmlParseError::Io(error) => error,
RdfXmlParseError::Syntax(error) => error.into(),
}
}
}
impl From<quick_xml::Error> for RdfXmlParseError {
#[inline]
fn from(error: quick_xml::Error) -> Self {
match error {
quick_xml::Error::Io(error) => {
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e)))
}
_ => Self::Syntax(RdfXmlSyntaxError(SyntaxErrorKind::Xml(error))),
}
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct RdfXmlSyntaxError(#[from] pub(crate) SyntaxErrorKind);
#[derive(Debug, thiserror::Error)]
pub enum SyntaxErrorKind {
#[error(transparent)]
Xml(#[from] quick_xml::Error),
#[error("error while parsing IRI '{iri}': {error}")]
InvalidIri {
iri: String,
#[source]
error: IriParseError,
},
#[error("error while parsing language tag '{tag}': {error}")]
InvalidLanguageTag {
tag: String,
#[source]
error: LanguageTagParseError,
},
#[error("{0}")]
Msg(String),
}
impl RdfXmlSyntaxError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(SyntaxErrorKind::Msg(msg.into()))
}
}
impl From<RdfXmlSyntaxError> for io::Error {
#[inline]
fn from(error: RdfXmlSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Xml(error) => match error {
quick_xml::Error::Io(error) => {
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e))
}
quick_xml::Error::UnexpectedEof(error) => {
Self::new(io::ErrorKind::UnexpectedEof, error)
}
_ => Self::new(io::ErrorKind::InvalidData, error),
},
SyntaxErrorKind::Msg(msg) => Self::new(io::ErrorKind::InvalidData, msg),
_ => Self::new(io::ErrorKind::InvalidData, error),
}
}
}

@ -0,0 +1,8 @@
mod error;
mod parser;
mod serializer;
mod utils;
pub use error::{RdfXmlParseError, RdfXmlSyntaxError};
pub use parser::{FromReadRdfXmlReader, RdfXmlParser};
pub use serializer::{RdfXmlSerializer, ToWriteRdfXmlWriter};

File diff suppressed because it is too large Load Diff

@ -0,0 +1,461 @@
use crate::oxrdf::vocab::rdf;
use crate::oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef};
use crate::oxrdfxml::utils::*;
use oxiri::{Iri, IriParseError};
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::Writer;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::io;
use std::io::Write;
use std::sync::Arc;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) serializer.
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct RdfXmlSerializer {
prefixes: BTreeMap<String, String>,
}
impl RdfXmlSerializer {
/// Builds a new [`RdfXmlSerializer`].
#[inline]
pub fn new() -> Self {
Self {
prefixes: BTreeMap::new(),
}
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes.insert(
Iri::parse(prefix_iri.into())?.into_inner(),
prefix_name.into(),
);
Ok(self)
}
/// Writes a RDF/XML file to a [`Write`] implementation.
///
/// This writer does unbuffered writes.
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteRdfXmlWriter<W> {
ToWriteRdfXmlWriter {
writer: Writer::new_with_indent(write, b'\t', 1),
inner: self.inner_writer(),
}
}
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation.
///
/// This writer does unbuffered writes.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// )).await?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// )).await?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[allow(clippy::unused_self)]
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteRdfXmlWriter<W> {
ToTokioAsyncWriteRdfXmlWriter {
writer: Writer::new_with_indent(write, b'\t', 1),
inner: self.inner_writer(),
}
}
fn inner_writer(mut self) -> InnerRdfXmlWriter {
self.prefixes.insert(
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(),
"rdf".into(),
);
InnerRdfXmlWriter {
current_subject: None,
current_resource_tag: None,
prefixes: self.prefixes,
}
}
}
/// Writes a RDF/XML file to a [`Write`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{LiteralRef, NamedNodeRef, TripleRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// ))?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteRdfXmlWriter<W: Write> {
writer: Writer<W>,
inner: InnerRdfXmlWriter,
}
impl<W: Write> ToWriteRdfXmlWriter<W> {
/// Writes an extra triple.
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
let mut buffer = Vec::new();
self.inner.write_triple(t, &mut buffer)?;
self.flush_buffer(&mut buffer)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::new();
self.inner.finish(&mut buffer);
self.flush_buffer(&mut buffer)?;
Ok(self.writer.into_inner())
}
fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
for event in buffer.drain(0..) {
self.writer.write_event(event).map_err(map_err)?;
}
Ok(())
}
}
/// Writes a RDF/XML file to a [`AsyncWrite`] implementation. Can be built using [`RdfXmlSerializer::serialize_to_tokio_async_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef, LiteralRef};
/// use oxrdfxml::RdfXmlSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut writer = RdfXmlSerializer::new().with_prefix("schema", "http://schema.org/")?.serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// )).await?;
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://schema.org/name")?,
/// LiteralRef::new_language_tagged_literal_unchecked("Foo Bar", "en"),
/// )).await?;
/// assert_eq!(
/// b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:schema=\"http://schema.org/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<schema:Person rdf:about=\"http://example.com#me\">\n\t\t<schema:name xml:lang=\"en\">Foo Bar</schema:name>\n\t</schema:Person>\n</rdf:RDF>",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteRdfXmlWriter<W: AsyncWrite + Unpin> {
writer: Writer<W>,
inner: InnerRdfXmlWriter,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteRdfXmlWriter<W> {
/// Writes an extra triple.
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
let mut buffer = Vec::new();
self.inner.write_triple(t, &mut buffer)?;
self.flush_buffer(&mut buffer).await
}
/// Ends the write process and returns the underlying [`Write`].
pub async fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::new();
self.inner.finish(&mut buffer);
self.flush_buffer(&mut buffer).await?;
Ok(self.writer.into_inner())
}
async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
for event in buffer.drain(0..) {
self.writer
.write_event_async(event)
.await
.map_err(map_err)?;
}
Ok(())
}
}
pub struct InnerRdfXmlWriter {
current_subject: Option<Subject>,
current_resource_tag: Option<String>,
prefixes: BTreeMap<String, String>,
}
impl InnerRdfXmlWriter {
#[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
output: &mut Vec<Event<'a>>,
) -> io::Result<()> {
if self.current_subject.is_none() {
self.write_start(output);
}
let triple = t.into();
// We open a new rdf:Description if useful
if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) {
if self.current_subject.is_some() {
output.push(Event::End(
self.current_resource_tag
.take()
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
));
}
self.current_subject = Some(triple.subject.into_owned());
let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE {
if let TermRef::NamedNode(t) = triple.object {
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t);
let mut description_open = BytesStart::new(prop_qname.clone());
if let Some(prop_xmlns) = prop_xmlns {
description_open.push_attribute(prop_xmlns);
}
self.current_resource_tag = Some(prop_qname.into_owned());
(description_open, true)
} else {
(BytesStart::new("rdf:Description"), false)
}
} else {
(BytesStart::new("rdf:Description"), false)
};
match triple.subject {
SubjectRef::NamedNode(node) => {
description_open.push_attribute(("rdf:about", node.as_str()))
}
SubjectRef::BlankNode(node) => {
description_open.push_attribute(("rdf:nodeID", node.as_str()))
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named or blank subject",
))
}
}
output.push(Event::Start(description_open));
if with_type_tag {
return Ok(()); // No need for a value
}
}
let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate);
let mut property_open = BytesStart::new(prop_qname.clone());
if let Some(prop_xmlns) = prop_xmlns {
property_open.push_attribute(prop_xmlns);
}
let content = match triple.object {
TermRef::NamedNode(node) => {
property_open.push_attribute(("rdf:resource", node.as_str()));
None
}
TermRef::BlankNode(node) => {
property_open.push_attribute(("rdf:nodeID", node.as_str()));
None
}
TermRef::Literal(literal) => {
if let Some(language) = literal.language() {
property_open.push_attribute(("xml:lang", language));
} else if !literal.is_plain() {
property_open.push_attribute(("rdf:datatype", literal.datatype().as_str()));
}
Some(literal.value())
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"RDF/XML only supports named, blank or literal object",
))
}
};
if let Some(content) = content {
output.push(Event::Start(property_open));
output.push(Event::Text(BytesText::new(content)));
output.push(Event::End(BytesEnd::new(prop_qname)));
} else {
output.push(Event::Empty(property_open));
}
Ok(())
}
fn write_start(&self, output: &mut Vec<Event<'_>>) {
output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)));
let mut rdf_open = BytesStart::new("rdf:RDF");
for (prefix_value, prefix_name) in &self.prefixes {
rdf_open.push_attribute((
format!("xmlns:{prefix_name}").as_str(),
prefix_value.as_str(),
));
}
output.push(Event::Start(rdf_open))
}
fn finish(&mut self, output: &mut Vec<Event<'static>>) {
if self.current_subject.is_some() {
output.push(Event::End(
self.current_resource_tag
.take()
.map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
));
} else {
self.write_start(output);
}
output.push(Event::End(BytesEnd::new("rdf:RDF")));
}
fn uri_to_qname_and_xmlns<'a>(
&self,
uri: NamedNodeRef<'a>,
) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) {
let (prop_prefix, prop_value) = split_iri(uri.as_str());
if let Some(prop_prefix) = self.prefixes.get(prop_prefix) {
(
if prop_prefix.is_empty() {
Cow::Borrowed(prop_value)
} else {
Cow::Owned(format!("{prop_prefix}:{prop_value}"))
},
None,
)
} else if prop_prefix == "http://www.w3.org/2000/xmlns/" {
(Cow::Owned(format!("xmlns:{prop_value}")), None)
} else if prop_value.is_empty() {
(Cow::Borrowed("p:"), Some(("xmlns:p", prop_prefix)))
} else {
(Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix)))
}
}
}
fn map_err(error: quick_xml::Error) -> io::Error {
if let quick_xml::Error::Io(error) = error {
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
} else {
io::Error::new(io::ErrorKind::Other, error)
}
}
fn split_iri(iri: &str) -> (&str, &str) {
if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') {
if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':')
{
(
&iri[..position_base + position_add],
&iri[position_base + position_add..],
)
} else {
(iri, "")
}
} else {
(iri, "")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_split_iri() {
assert_eq!(
split_iri("http://schema.org/Person"),
("http://schema.org/", "Person")
);
assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", ""));
assert_eq!(
split_iri("http://schema.org#foo"),
("http://schema.org#", "foo")
);
assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo"));
}
}

@ -0,0 +1,26 @@
pub fn is_name_start_char(c: char) -> bool {
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
matches!(c,
':'
| 'A'..='Z'
| '_'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
pub fn is_name_char(c: char) -> bool {
// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
is_name_start_char(c)
|| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}

@ -0,0 +1,65 @@
oxsdatatypes
============
[![Latest Version](https://img.shields.io/crates/v/oxsdatatypes.svg)](https://crates.io/crates/oxsdatatypes)
[![Released API docs](https://docs.rs/oxsdatatypes/badge.svg)](https://docs.rs/oxsdatatypes)
[![Crates.io downloads](https://img.shields.io/crates/d/oxsdatatypes)](https://crates.io/crates/oxsdatatypes)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
oxsdatatypes is an implementation of some [XML Schema Definition Language Datatypes](https://www.w3.org/TR/xmlschema11-2/).
Its main aim is to ease the implementation of SPARQL and XPath.
Usage example:
```rust
use std::str::FromStr;
use oxsdatatypes::Decimal;
assert!(Decimal::from_str("22.2").unwrap() > Decimal::from_str("21").unwrap());
```
Each datatype is represented by a Rust struct.
Each datatype provides:
* `FromStr` implementation to parse a datatype string serialization following its [lexical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-lexical-mapping).
* `Display` implementation to serialize a datatype following its [canonical mapping](https://www.w3.org/TR/xmlschema11-2/#dt-canonical-mapping).
* `is_identical_with` method following its [identity relation](https://www.w3.org/TR/xmlschema11-2/#identity).
* `PartialEq`, and `Eq` if possible, implementations following its [equality relation](https://www.w3.org/TR/xmlschema11-2/#equality).
* `PartialOrd`, and `Ord` if possible, implementations following its [order relation](https://www.w3.org/TR/xmlschema11-2/#order).
* `From` and `TryFrom` implementations to implement [XPath casting](https://www.w3.org/TR/xpath-functions-31/#casting).
* Various methods implementing [XPath functions](https://www.w3.org/TR/xpath-functions-31/).
* `from_be_bytes` and `to_be_bytes` methods for serialization.
### `DateTime::now` behavior
The `DateTime::now()` function needs special OS support.
Currently:
- If the `custom-now` feature is enabled, a function computing `now` must be set:
```rust
use oxsdatatypes::Duration;
#[no_mangle]
fn custom_ox_now() -> Duration {
unimplemented!("now implementation")
}
```
- For `wasm32-unknown-unknown` if the `js` feature is enabled the `Date.now()` ECMAScript API is used.
- For all other targets `SystemTime::now()` is used.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,134 @@
use crate::oxsdatatypes::{Decimal, Double, Float, Integer};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::{FromStr, ParseBoolError};
/// [XML Schema `boolean` datatype](https://www.w3.org/TR/xmlschema11-2/#boolean)
///
/// Uses internally a [`bool`].
#[derive(
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
)]
#[repr(transparent)]
pub struct Boolean {
value: bool,
}
impl Boolean {
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self == other
}
}
impl From<bool> for Boolean {
#[inline]
fn from(value: bool) -> Self {
Self { value }
}
}
impl From<Integer> for Boolean {
#[inline]
fn from(value: Integer) -> Self {
(value != Integer::from(0)).into()
}
}
impl From<Decimal> for Boolean {
#[inline]
fn from(value: Decimal) -> Self {
(value != Decimal::from(0)).into()
}
}
impl From<Float> for Boolean {
#[inline]
fn from(value: Float) -> Self {
(value != Float::from(0.) && !value.is_nan()).into()
}
}
impl From<Double> for Boolean {
#[inline]
fn from(value: Double) -> Self {
(value != Double::from(0.) && !value.is_nan()).into()
}
}
impl From<Boolean> for bool {
#[inline]
fn from(value: Boolean) -> Self {
value.value
}
}
impl FromStr for Boolean {
type Err = ParseBoolError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(match input {
"true" | "1" => true,
"false" | "0" => false,
_ => bool::from_str(input)?,
}
.into())
}
}
impl fmt::Display for Boolean {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.value.fmt(f)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn from_str() -> Result<(), ParseBoolError> {
assert_eq!(Boolean::from_str("true")?.to_string(), "true");
assert_eq!(Boolean::from_str("1")?.to_string(), "true");
assert_eq!(Boolean::from_str("false")?.to_string(), "false");
assert_eq!(Boolean::from_str("0")?.to_string(), "false");
Ok(())
}
#[test]
fn from_integer() {
assert_eq!(Boolean::from(false), Integer::from(0).into());
assert_eq!(Boolean::from(true), Integer::from(1).into());
assert_eq!(Boolean::from(true), Integer::from(2).into());
}
#[test]
fn from_decimal() {
assert_eq!(Boolean::from(false), Decimal::from(0).into());
assert_eq!(Boolean::from(true), Decimal::from(1).into());
assert_eq!(Boolean::from(true), Decimal::from(2).into());
}
#[test]
fn from_float() {
assert_eq!(Boolean::from(false), Float::from(0.).into());
assert_eq!(Boolean::from(true), Float::from(1.).into());
assert_eq!(Boolean::from(true), Float::from(2.).into());
assert_eq!(Boolean::from(false), Float::from(f32::NAN).into());
assert_eq!(Boolean::from(true), Float::from(f32::INFINITY).into());
}
#[test]
fn from_double() {
assert_eq!(Boolean::from(false), Double::from(0.).into());
assert_eq!(Boolean::from(true), Double::from(1.).into());
assert_eq!(Boolean::from(true), Double::from(2.).into());
assert_eq!(Boolean::from(false), Double::from(f64::NAN).into());
assert_eq!(Boolean::from(true), Double::from(f64::INFINITY).into());
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,326 @@
use crate::oxsdatatypes::{Boolean, Float, Integer};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use std::num::ParseFloatError;
use std::ops::{Add, Div, Mul, Neg, Sub};
use std::str::FromStr;
/// [XML Schema `double` datatype](https://www.w3.org/TR/xmlschema11-2/#double)
///
/// Uses internally a [`f64`].
///
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)]
#[repr(transparent)]
pub struct Double {
value: f64,
}
impl Double {
pub const INFINITY: Self = Self {
value: f64::INFINITY,
};
pub const MAX: Self = Self { value: f64::MAX };
pub const MIN: Self = Self { value: f64::MIN };
pub const NAN: Self = Self { value: f64::NAN };
pub const NEG_INFINITY: Self = Self {
value: f64::NEG_INFINITY,
};
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 8]) -> Self {
Self {
value: f64::from_be_bytes(bytes),
}
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 8] {
self.value.to_be_bytes()
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
#[inline]
#[must_use]
pub fn abs(self) -> Self {
self.value.abs().into()
}
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
self.value.ceil().into()
}
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
#[inline]
#[must_use]
pub fn floor(self) -> Self {
self.value.floor().into()
}
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
#[inline]
#[must_use]
pub fn round(self) -> Self {
self.value.round().into()
}
#[inline]
#[must_use]
pub fn is_nan(self) -> bool {
self.value.is_nan()
}
#[inline]
#[must_use]
pub fn is_finite(self) -> bool {
self.value.is_finite()
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self.value.to_bits() == other.value.to_bits()
}
}
impl From<Double> for f64 {
#[inline]
fn from(value: Double) -> Self {
value.value
}
}
impl From<f64> for Double {
#[inline]
fn from(value: f64) -> Self {
Self { value }
}
}
impl From<i8> for Double {
#[inline]
fn from(value: i8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i16> for Double {
#[inline]
fn from(value: i16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i32> for Double {
#[inline]
fn from(value: i32) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u8> for Double {
#[inline]
fn from(value: u8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u16> for Double {
#[inline]
fn from(value: u16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u32> for Double {
#[inline]
fn from(value: u32) -> Self {
Self {
value: value.into(),
}
}
}
impl From<Float> for Double {
#[inline]
fn from(value: Float) -> Self {
Self {
value: value.into(),
}
}
}
impl From<Boolean> for Double {
#[inline]
fn from(value: Boolean) -> Self {
f64::from(bool::from(value)).into()
}
}
impl From<Integer> for Double {
#[inline]
#[allow(clippy::cast_precision_loss)]
fn from(value: Integer) -> Self {
(i64::from(value) as f64).into()
}
}
impl FromStr for Double {
type Err = ParseFloatError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(f64::from_str(input)?.into())
}
}
impl fmt::Display for Double {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.value == f64::INFINITY {
f.write_str("INF")
} else if self.value == f64::NEG_INFINITY {
f.write_str("-INF")
} else {
self.value.fmt(f)
}
}
}
impl PartialOrd for Double {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl Neg for Double {
type Output = Self;
#[inline]
fn neg(self) -> Self {
(-self.value).into()
}
}
impl Add for Double {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
(self.value + rhs.value).into()
}
}
impl Sub for Double {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self {
(self.value - rhs.value).into()
}
}
impl Mul for Double {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
(self.value * rhs.value).into()
}
}
impl Div for Double {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self {
(self.value / rhs.value).into()
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn eq() {
assert_eq!(Double::from(0_f64), Double::from(0_f64));
assert_ne!(Double::NAN, Double::NAN);
assert_eq!(Double::from(-0.), Double::from(0.));
}
#[test]
fn cmp() {
assert_eq!(
Double::from(0.).partial_cmp(&Double::from(0.)),
Some(Ordering::Equal)
);
assert_eq!(
Double::INFINITY.partial_cmp(&Double::MAX),
Some(Ordering::Greater)
);
assert_eq!(
Double::NEG_INFINITY.partial_cmp(&Double::MIN),
Some(Ordering::Less)
);
assert_eq!(Double::NAN.partial_cmp(&Double::from(0.)), None);
assert_eq!(Double::NAN.partial_cmp(&Double::NAN), None);
assert_eq!(
Double::from(0.).partial_cmp(&Double::from(-0.)),
Some(Ordering::Equal)
);
}
#[test]
fn is_identical_with() {
assert!(Double::from(0.).is_identical_with(Double::from(0.)));
assert!(Double::NAN.is_identical_with(Double::NAN));
assert!(!Double::from(-0.).is_identical_with(Double::from(0.)));
}
#[test]
fn from_str() -> Result<(), ParseFloatError> {
assert_eq!(Double::from_str("NaN")?.to_string(), "NaN");
assert_eq!(Double::from_str("INF")?.to_string(), "INF");
assert_eq!(Double::from_str("+INF")?.to_string(), "INF");
assert_eq!(Double::from_str("-INF")?.to_string(), "-INF");
assert_eq!(Double::from_str("0.0E0")?.to_string(), "0");
assert_eq!(Double::from_str("-0.0E0")?.to_string(), "-0");
assert_eq!(Double::from_str("0.1e1")?.to_string(), "1");
assert_eq!(Double::from_str("-0.1e1")?.to_string(), "-1");
assert_eq!(Double::from_str("1.e1")?.to_string(), "10");
assert_eq!(Double::from_str("-1.e1")?.to_string(), "-10");
assert_eq!(Double::from_str("1")?.to_string(), "1");
assert_eq!(Double::from_str("-1")?.to_string(), "-1");
assert_eq!(Double::from_str("1.")?.to_string(), "1");
assert_eq!(Double::from_str("-1.")?.to_string(), "-1");
assert_eq!(
Double::from_str(&f64::MIN.to_string()).unwrap(),
Double::MIN
);
assert_eq!(
Double::from_str(&f64::MAX.to_string()).unwrap(),
Double::MAX
);
Ok(())
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,310 @@
use crate::oxsdatatypes::{Boolean, Double, Integer};
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use std::num::ParseFloatError;
use std::ops::{Add, Div, Mul, Neg, Sub};
use std::str::FromStr;
/// [XML Schema `float` datatype](https://www.w3.org/TR/xmlschema11-2/#float)
///
/// Uses internally a [`f32`].
///
/// <div class="warning">Serialization does not follow the canonical mapping.</div>
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)]
#[repr(transparent)]
pub struct Float {
value: f32,
}
impl Float {
pub const INFINITY: Self = Self {
value: f32::INFINITY,
};
pub const MAX: Self = Self { value: f32::MAX };
pub const MIN: Self = Self { value: f32::MIN };
pub const NAN: Self = Self { value: f32::NAN };
pub const NEG_INFINITY: Self = Self {
value: f32::NEG_INFINITY,
};
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 4]) -> Self {
Self {
value: f32::from_be_bytes(bytes),
}
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 4] {
self.value.to_be_bytes()
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
#[inline]
#[must_use]
pub fn abs(self) -> Self {
self.value.abs().into()
}
/// [fn:ceiling](https://www.w3.org/TR/xpath-functions-31/#func-ceiling)
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
self.value.ceil().into()
}
/// [fn:floor](https://www.w3.org/TR/xpath-functions-31/#func-floor)
#[inline]
#[must_use]
pub fn floor(self) -> Self {
self.value.floor().into()
}
/// [fn:round](https://www.w3.org/TR/xpath-functions-31/#func-round)
#[inline]
#[must_use]
pub fn round(self) -> Self {
self.value.round().into()
}
#[inline]
#[must_use]
pub fn is_nan(self) -> bool {
self.value.is_nan()
}
#[inline]
#[must_use]
pub fn is_finite(self) -> bool {
self.value.is_finite()
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self.value.to_bits() == other.value.to_bits()
}
}
impl From<Float> for f32 {
#[inline]
fn from(value: Float) -> Self {
value.value
}
}
impl From<Float> for f64 {
#[inline]
fn from(value: Float) -> Self {
value.value.into()
}
}
impl From<f32> for Float {
#[inline]
fn from(value: f32) -> Self {
Self { value }
}
}
impl From<i8> for Float {
#[inline]
fn from(value: i8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i16> for Float {
#[inline]
fn from(value: i16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u8> for Float {
#[inline]
fn from(value: u8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u16> for Float {
#[inline]
fn from(value: u16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<Boolean> for Float {
#[inline]
fn from(value: Boolean) -> Self {
f32::from(bool::from(value)).into()
}
}
impl From<Integer> for Float {
#[inline]
#[allow(clippy::cast_precision_loss)]
fn from(value: Integer) -> Self {
(i64::from(value) as f32).into()
}
}
impl From<Double> for Float {
#[inline]
#[allow(clippy::cast_possible_truncation)]
fn from(value: Double) -> Self {
Self {
value: f64::from(value) as f32,
}
}
}
impl FromStr for Float {
type Err = ParseFloatError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(f32::from_str(input)?.into())
}
}
impl fmt::Display for Float {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.value == f32::INFINITY {
f.write_str("INF")
} else if self.value == f32::NEG_INFINITY {
f.write_str("-INF")
} else {
self.value.fmt(f)
}
}
}
impl PartialOrd for Float {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl Neg for Float {
type Output = Self;
#[inline]
fn neg(self) -> Self {
(-self.value).into()
}
}
impl Add for Float {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
(self.value + rhs.value).into()
}
}
impl Sub for Float {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self {
(self.value - rhs.value).into()
}
}
impl Mul for Float {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
(self.value * rhs.value).into()
}
}
impl Div for Float {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self {
(self.value / rhs.value).into()
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn eq() {
assert_eq!(Float::from(0.), Float::from(0.));
assert_ne!(Float::NAN, Float::NAN);
assert_eq!(Float::from(-0.), Float::from(0.));
}
#[test]
fn cmp() {
assert_eq!(
Float::from(0.).partial_cmp(&Float::from(0.)),
Some(Ordering::Equal)
);
assert_eq!(
Float::INFINITY.partial_cmp(&Float::MAX),
Some(Ordering::Greater)
);
assert_eq!(
Float::NEG_INFINITY.partial_cmp(&Float::MIN),
Some(Ordering::Less)
);
assert_eq!(Float::NAN.partial_cmp(&Float::from(0.)), None);
assert_eq!(Float::NAN.partial_cmp(&Float::NAN), None);
assert_eq!(
Float::from(0.).partial_cmp(&Float::from(-0.)),
Some(Ordering::Equal)
);
}
#[test]
fn is_identical_with() {
assert!(Float::from(0.).is_identical_with(Float::from(0.)));
assert!(Float::NAN.is_identical_with(Float::NAN));
assert!(!Float::from(-0.).is_identical_with(Float::from(0.)));
}
#[test]
fn from_str() -> Result<(), ParseFloatError> {
assert_eq!(Float::from_str("NaN")?.to_string(), "NaN");
assert_eq!(Float::from_str("INF")?.to_string(), "INF");
assert_eq!(Float::from_str("+INF")?.to_string(), "INF");
assert_eq!(Float::from_str("-INF")?.to_string(), "-INF");
assert_eq!(Float::from_str("0.0E0")?.to_string(), "0");
assert_eq!(Float::from_str("-0.0E0")?.to_string(), "-0");
assert_eq!(Float::from_str("0.1e1")?.to_string(), "1");
assert_eq!(Float::from_str("-0.1e1")?.to_string(), "-1");
assert_eq!(Float::from_str("1.e1")?.to_string(), "10");
assert_eq!(Float::from_str("-1.e1")?.to_string(), "-10");
assert_eq!(Float::from_str("1")?.to_string(), "1");
assert_eq!(Float::from_str("-1")?.to_string(), "-1");
assert_eq!(Float::from_str("1.")?.to_string(), "1");
assert_eq!(Float::from_str("-1.")?.to_string(), "-1");
assert_eq!(Float::from_str(&f32::MIN.to_string())?, Float::MIN);
assert_eq!(Float::from_str(&f32::MAX.to_string())?, Float::MAX);
Ok(())
}
}

@ -0,0 +1,400 @@
use crate::oxsdatatypes::{Boolean, Decimal, Double, Float};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::num::ParseIntError;
use std::str::FromStr;
/// [XML Schema `integer` datatype](https://www.w3.org/TR/xmlschema11-2/#integer)
///
/// Uses internally a [`i64`].
#[derive(
Debug, Clone, Copy, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
)]
#[repr(transparent)]
pub struct Integer {
value: i64,
}
impl Integer {
pub const MAX: Self = Self { value: i64::MAX };
pub const MIN: Self = Self { value: i64::MIN };
#[inline]
#[must_use]
pub fn from_be_bytes(bytes: [u8; 8]) -> Self {
Self {
value: i64::from_be_bytes(bytes),
}
}
#[inline]
#[must_use]
pub fn to_be_bytes(self) -> [u8; 8] {
self.value.to_be_bytes()
}
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions-31/#func-numeric-add)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_add(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_add(rhs.into().value)?,
})
}
/// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions-31/#func-numeric-subtract)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_sub(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_sub(rhs.into().value)?,
})
}
/// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions-31/#func-numeric-multiply)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_mul(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_mul(rhs.into().value)?,
})
}
/// [op:numeric-integer-divide](https://www.w3.org/TR/xpath-functions-31/#func-numeric-integer-divide)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_div(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_div(rhs.into().value)?,
})
}
/// [op:numeric-mod](https://www.w3.org/TR/xpath-functions-31/#func-numeric-mod)
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem(rhs.into().value)?,
})
}
/// Euclidean remainder
///
/// Returns `None` in case of division by 0 ([FOAR0001](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0001)) or overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_rem_euclid(self, rhs: impl Into<Self>) -> Option<Self> {
Some(Self {
value: self.value.checked_rem_euclid(rhs.into().value)?,
})
}
/// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions-31/#func-numeric-unary-minus)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_neg(self) -> Option<Self> {
Some(Self {
value: self.value.checked_neg()?,
})
}
/// [fn:abs](https://www.w3.org/TR/xpath-functions-31/#func-abs)
///
/// Returns `None` in case of overflow ([FOAR0002](https://www.w3.org/TR/xpath-functions-31/#ERRFOAR0002)).
#[inline]
#[must_use]
pub fn checked_abs(self) -> Option<Self> {
Some(Self {
value: self.value.checked_abs()?,
})
}
#[inline]
#[must_use]
pub const fn is_negative(self) -> bool {
self.value < 0
}
#[inline]
#[must_use]
pub const fn is_positive(self) -> bool {
self.value > 0
}
/// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity).
#[inline]
#[must_use]
pub fn is_identical_with(self, other: Self) -> bool {
self == other
}
}
impl From<bool> for Integer {
#[inline]
fn from(value: bool) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i8> for Integer {
#[inline]
fn from(value: i8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i16> for Integer {
#[inline]
fn from(value: i16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i32> for Integer {
#[inline]
fn from(value: i32) -> Self {
Self {
value: value.into(),
}
}
}
impl From<i64> for Integer {
#[inline]
fn from(value: i64) -> Self {
Self { value }
}
}
impl From<u8> for Integer {
#[inline]
fn from(value: u8) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u16> for Integer {
#[inline]
fn from(value: u16) -> Self {
Self {
value: value.into(),
}
}
}
impl From<u32> for Integer {
#[inline]
fn from(value: u32) -> Self {
Self {
value: value.into(),
}
}
}
impl From<Boolean> for Integer {
#[inline]
fn from(value: Boolean) -> Self {
bool::from(value).into()
}
}
impl From<Integer> for i64 {
#[inline]
fn from(value: Integer) -> Self {
value.value
}
}
impl FromStr for Integer {
type Err = ParseIntError;
#[inline]
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(i64::from_str(input)?.into())
}
}
impl fmt::Display for Integer {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.value.fmt(f)
}
}
impl TryFrom<Float> for Integer {
type Error = TooLargeForIntegerError;
#[inline]
fn try_from(value: Float) -> Result<Self, Self::Error> {
Decimal::try_from(value)
.map_err(|_| TooLargeForIntegerError)?
.try_into()
}
}
impl TryFrom<Double> for Integer {
type Error = TooLargeForIntegerError;
#[inline]
fn try_from(value: Double) -> Result<Self, Self::Error> {
Decimal::try_from(value)
.map_err(|_| TooLargeForIntegerError)?
.try_into()
}
}
/// The input is too large to fit into an [`Integer`].
///
/// Matches XPath [`FOCA0003` error](https://www.w3.org/TR/xpath-functions-31/#ERRFOCA0003).
#[derive(Debug, Clone, Copy, thiserror::Error)]
#[error("Value too large for xsd:integer internal representation")]
pub struct TooLargeForIntegerError;
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
#[test]
fn from_str() -> Result<(), ParseIntError> {
assert_eq!(Integer::from_str("0")?.to_string(), "0");
assert_eq!(Integer::from_str("-0")?.to_string(), "0");
assert_eq!(Integer::from_str("123")?.to_string(), "123");
assert_eq!(Integer::from_str("-123")?.to_string(), "-123");
Integer::from_str("123456789123456789123456789123456789123456789").unwrap_err();
Ok(())
}
#[test]
fn from_float() -> Result<(), ParseIntError> {
assert_eq!(
Integer::try_from(Float::from(0.)).ok(),
Some(Integer::from_str("0")?)
);
assert_eq!(
Integer::try_from(Float::from(-0.)).ok(),
Some(Integer::from_str("0")?)
);
assert_eq!(
Integer::try_from(Float::from(-123.1)).ok(),
Some(Integer::from_str("-123")?)
);
Integer::try_from(Float::from(f32::NAN)).unwrap_err();
Integer::try_from(Float::from(f32::INFINITY)).unwrap_err();
Integer::try_from(Float::from(f32::NEG_INFINITY)).unwrap_err();
Integer::try_from(Float::from(f32::MIN)).unwrap_err();
Integer::try_from(Float::from(f32::MAX)).unwrap_err();
assert!(
Integer::try_from(Float::from(1_672_507_300_000.))
.unwrap()
.checked_sub(Integer::from_str("1672507300000")?)
.unwrap()
.checked_abs()
.unwrap()
< Integer::from(1_000_000)
);
Ok(())
}
#[test]
fn from_double() -> Result<(), ParseIntError> {
assert_eq!(
Integer::try_from(Double::from(0.0)).ok(),
Some(Integer::from_str("0")?)
);
assert_eq!(
Integer::try_from(Double::from(-0.0)).ok(),
Some(Integer::from_str("0")?)
);
assert_eq!(
Integer::try_from(Double::from(-123.1)).ok(),
Some(Integer::from_str("-123")?)
);
assert!(
Integer::try_from(Double::from(1_672_507_300_000.))
.unwrap()
.checked_sub(Integer::from_str("1672507300000").unwrap())
.unwrap()
.checked_abs()
.unwrap()
< Integer::from(10)
);
Integer::try_from(Double::from(f64::NAN)).unwrap_err();
Integer::try_from(Double::from(f64::INFINITY)).unwrap_err();
Integer::try_from(Double::from(f64::NEG_INFINITY)).unwrap_err();
Integer::try_from(Double::from(f64::MIN)).unwrap_err();
Integer::try_from(Double::from(f64::MAX)).unwrap_err();
Ok(())
}
#[test]
fn from_decimal() -> Result<(), ParseIntError> {
assert_eq!(
Integer::try_from(Decimal::from(0)).ok(),
Some(Integer::from_str("0")?)
);
assert_eq!(
Integer::try_from(Decimal::from_str("-123.1").unwrap()).ok(),
Some(Integer::from_str("-123")?)
);
Integer::try_from(Decimal::MIN).unwrap_err();
Integer::try_from(Decimal::MAX).unwrap_err();
Ok(())
}
#[test]
fn add() {
assert_eq!(
Integer::MIN.checked_add(1),
Some(Integer::from(i64::MIN + 1))
);
assert_eq!(Integer::MAX.checked_add(1), None);
}
#[test]
fn sub() {
assert_eq!(Integer::MIN.checked_sub(1), None);
assert_eq!(
Integer::MAX.checked_sub(1),
Some(Integer::from(i64::MAX - 1))
);
}
#[test]
fn mul() {
assert_eq!(Integer::MIN.checked_mul(2), None);
assert_eq!(Integer::MAX.checked_mul(2), None);
}
#[test]
fn div() {
assert_eq!(Integer::from(1).checked_div(0), None);
}
#[test]
fn rem() {
assert_eq!(Integer::from(10).checked_rem(3), Some(Integer::from(1)));
assert_eq!(Integer::from(6).checked_rem(-2), Some(Integer::from(0)));
assert_eq!(Integer::from(1).checked_rem(0), None);
}
}

@ -0,0 +1,21 @@
mod boolean;
mod date_time;
mod decimal;
mod double;
mod duration;
mod float;
mod integer;
pub use self::boolean::Boolean;
pub use self::date_time::{
Date, DateTime, DateTimeOverflowError, GDay, GMonth, GMonthDay, GYear, GYearMonth,
InvalidTimezoneError, ParseDateTimeError, Time, TimezoneOffset,
};
pub use self::decimal::{Decimal, ParseDecimalError, TooLargeForDecimalError};
pub use self::double::Double;
pub use self::duration::{
DayTimeDuration, Duration, DurationOverflowError, OppositeSignInDurationComponentsError,
ParseDurationError, YearMonthDuration,
};
pub use self::float::Float;
pub use self::integer::{Integer, TooLargeForIntegerError};

@ -0,0 +1,54 @@
OxTTL
=====
[![Latest Version](https://img.shields.io/crates/v/oxttl.svg)](https://crates.io/crates/oxttl)
[![Released API docs](https://docs.rs/oxttl/badge.svg)](https://docs.rs/oxttl)
[![Crates.io downloads](https://img.shields.io/crates/d/oxttl)](https://crates.io/crates/oxttl)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxttl is a set of parsers and serializers for [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [N3](https://w3c.github.io/N3/spec/).
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is also available behind the `rdf-star`feature for all languages but N3 ([Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star), [TriG-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#trig-star), [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) and [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star))
It is designed as a low level parser compatible with both synchronous and asynchronous I/O.
Usage example counting the number of people in a Turtle file:
```rust
use oxrdf::{NamedNodeRef, vocab::rdf};
use oxttl::TurtleParser;
let file = b"@base <http://example.com/> .
@prefix schema: <http://schema.org/> .
<foo> a schema:Person ;
schema:name \"Foo\" .
<bar> a schema:Person ;
schema:name \"Bar\" .";
let schema_person = NamedNodeRef::new("http://schema.org/Person").unwrap();
let mut count = 0;
for triple in TurtleParser::new().parse_read(file.as_ref()) {
let triple = triple.unwrap();
if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
count += 1;
}
}
assert_eq!(2, count);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,977 @@
use crate::oxrdf::NamedNode;
use crate::oxttl::toolkit::{TokenRecognizer, TokenRecognizerError};
use memchr::{memchr, memchr2};
use oxilangtag::LanguageTag;
use oxiri::Iri;
use std::borrow::Cow;
use std::cmp::min;
use std::collections::HashMap;
use std::ops::Range;
use std::str;
#[derive(Debug, PartialEq, Eq)]
pub enum N3Token<'a> {
IriRef(String),
PrefixedName {
prefix: &'a str,
local: Cow<'a, str>,
might_be_invalid_iri: bool,
},
Variable(Cow<'a, str>),
BlankNodeLabel(&'a str),
String(String),
Integer(&'a str),
Decimal(&'a str),
Double(&'a str),
LangTag(&'a str),
Punctuation(&'a str),
PlainKeyword(&'a str),
}
#[derive(Eq, PartialEq)]
pub enum N3LexerMode {
NTriples,
Turtle,
N3,
}
#[derive(Default)]
pub struct N3LexerOptions {
pub base_iri: Option<Iri<String>>,
}
pub struct N3Lexer {
mode: N3LexerMode,
unchecked: bool,
}
// TODO: there are a lot of 'None' (missing data) returned even if the stream is ending!!!
// TODO: simplify by not giving is_end and fail with an "unexpected eof" is none is returned when is_end=true?
impl TokenRecognizer for N3Lexer {
type Token<'a> = N3Token<'a>;
type Options = N3LexerOptions;
fn recognize_next_token<'a>(
&mut self,
data: &'a [u8],
is_ending: bool,
options: &N3LexerOptions,
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
match *data.first()? {
b'<' => match *data.get(1)? {
b'<' => Some((2, Ok(N3Token::Punctuation("<<")))),
b'=' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = self.recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
(2, Ok(N3Token::Punctuation("<=")))
})
} else if is_ending {
Some((2, Ok(N3Token::Punctuation("<="))))
} else {
None
}
}
b'-' if self.mode == N3LexerMode::N3 => {
if let Some((consumed, result)) = self.recognize_iri(data, options) {
Some(if let Ok(result) = result {
(consumed, Ok(result))
} else {
(2, Ok(N3Token::Punctuation("<-")))
})
} else if is_ending {
Some((2, Ok(N3Token::Punctuation("<-"))))
} else {
None
}
}
_ => self.recognize_iri(data, options),
},
b'>' => {
if *data.get(1)? == b'>' {
Some((2, Ok(N3Token::Punctuation(">>"))))
} else {
Some((1, Ok(N3Token::Punctuation(">"))))
}
}
b'_' => match data.get(1)? {
b':' => Self::recognize_blank_node_label(data),
c => Some((
1,
Err((0, format!("Unexpected character '{}'", char::from(*c))).into()),
)),
},
b'"' => {
if self.mode != N3LexerMode::NTriples
&& *data.get(1)? == b'"'
&& *data.get(2)? == b'"'
{
Self::recognize_long_string(data, b'"')
} else {
Self::recognize_string(data, b'"')
}
}
b'\'' if self.mode != N3LexerMode::NTriples => {
if *data.get(1)? == b'\'' && *data.get(2)? == b'\'' {
Self::recognize_long_string(data, b'\'')
} else {
Self::recognize_string(data, b'\'')
}
}
b'@' => self.recognize_lang_tag(data),
b'.' => match data.get(1) {
Some(b'0'..=b'9') => Self::recognize_number(data),
Some(_) => Some((1, Ok(N3Token::Punctuation(".")))),
None => is_ending.then_some((1, Ok(N3Token::Punctuation(".")))),
},
b'^' => {
if *data.get(1)? == b'^' {
Some((2, Ok(N3Token::Punctuation("^^"))))
} else {
Some((1, Ok(N3Token::Punctuation("^"))))
}
}
b'(' => Some((1, Ok(N3Token::Punctuation("(")))),
b')' => Some((1, Ok(N3Token::Punctuation(")")))),
b'[' => Some((1, Ok(N3Token::Punctuation("[")))),
b']' => Some((1, Ok(N3Token::Punctuation("]")))),
b'{' => {
if *data.get(1)? == b'|' {
Some((2, Ok(N3Token::Punctuation("{|"))))
} else {
Some((1, Ok(N3Token::Punctuation("{"))))
}
}
b'}' => Some((1, Ok(N3Token::Punctuation("}")))),
b',' => Some((1, Ok(N3Token::Punctuation(",")))),
b';' => Some((1, Ok(N3Token::Punctuation(";")))),
b'!' => Some((1, Ok(N3Token::Punctuation("!")))),
b'|' => {
if *data.get(1)? == b'}' {
Some((2, Ok(N3Token::Punctuation("|}"))))
} else {
Some((1, Ok(N3Token::Punctuation("|"))))
}
}
b'=' => {
if *data.get(1)? == b'>' {
Some((2, Ok(N3Token::Punctuation("=>"))))
} else {
Some((1, Ok(N3Token::Punctuation("="))))
}
}
b'0'..=b'9' | b'+' | b'-' => Self::recognize_number(data),
b'?' => self.recognize_variable(data, is_ending),
_ => self.recognize_pname_or_keyword(data, is_ending),
}
}
}
impl N3Lexer {
pub fn new(mode: N3LexerMode, unchecked: bool) -> Self {
Self { mode, unchecked }
}
fn recognize_iri(
&self,
data: &[u8],
options: &N3LexerOptions,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */
let mut string = Vec::new();
let mut i = 1;
loop {
let end = memchr2(b'>', b'\\', &data[i..])?;
string.extend_from_slice(&data[i..i + end]);
i += end;
match data[i] {
b'>' => {
#[allow(clippy::range_plus_one)]
return Some((i + 1, self.parse_iri(string, 0..i + 1, options)));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, false)?;
i += additional + 1;
match c {
Ok(c) => {
let mut buf = [0; 4];
string.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Err(e) => return Some((i, Err(e))),
}
}
_ => unreachable!(),
}
}
}
fn parse_iri(
&self,
iri: Vec<u8>,
position: Range<usize>,
options: &N3LexerOptions,
) -> Result<N3Token<'static>, TokenRecognizerError> {
let iri = string_from_utf8(iri, position.clone())?;
Ok(N3Token::IriRef(
if let Some(base_iri) = options.base_iri.as_ref() {
if self.unchecked {
base_iri.resolve_unchecked(&iri)
} else {
base_iri
.resolve(&iri)
.map_err(|e| (position, e.to_string()))?
}
.into_inner()
} else if self.unchecked {
iri
} else {
Iri::parse(iri)
.map_err(|e| (position, e.to_string()))?
.into_inner()
},
))
}
fn recognize_pname_or_keyword<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [139s] PNAME_NS ::= PN_PREFIX? ':'
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
// [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
let mut i = 0;
loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r {
Ok((c, consumed)) => {
if c == ':' {
i += consumed;
break;
} else if i == 0 {
if !Self::is_possible_pn_chars_base(c) {
return Some((
consumed,
Err((
0..consumed,
format!(
"'{c}' is not allowed at the beginning of a prefix name"
),
)
.into()),
));
}
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == '.' {
i += consumed;
} else {
while data[..i].ends_with(b".") {
i -= 1;
}
return Some((
i,
str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword),
));
}
}
Err(e) => return Some((e.location.end, Err(e))),
}
} else if is_ending {
while data[..i].ends_with(b".") {
i -= 1;
}
return Some(if i == 0 {
(
1,
Err((0..1, format!("Unexpected byte {}", data[0])).into()),
)
} else {
(
i,
str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword),
)
});
} else {
return None;
}
}
let pn_prefix = match str_from_utf8(&data[..i - 1], 0..i - 1) {
Ok(pn_prefix) => pn_prefix,
Err(e) => return Some((i, Err(e))),
};
if pn_prefix.ends_with('.') {
return Some((
i,
Err((
0..i,
format!(
"'{pn_prefix}' is not a valid prefix: prefixes are not allowed to end with '.'"),
)
.into()),
));
}
let (consumed, pn_local_result) =
self.recognize_optional_pn_local(&data[i..], is_ending)?;
Some((
consumed + i,
pn_local_result.map(|(local, might_be_invalid_iri)| N3Token::PrefixedName {
prefix: pn_prefix,
local,
might_be_invalid_iri,
}),
))
}
fn recognize_variable<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [36] QUICK_VAR_NAME ::= "?" PN_LOCAL
let (consumed, result) = self.recognize_optional_pn_local(&data[1..], is_ending)?;
Some((
consumed + 1,
result.and_then(|(name, _)| {
if name.is_empty() {
Err((0..consumed, "A variable name is not allowed to be empty").into())
} else {
Ok(N3Token::Variable(name))
}
}),
))
}
fn recognize_optional_pn_local<'a>(
&self,
data: &'a [u8],
is_ending: bool,
) -> Option<(usize, Result<(Cow<'a, str>, bool), TokenRecognizerError>)> {
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
let mut i = 0;
let mut buffer = None; // Buffer if there are some escaped characters
let mut position_that_is_already_in_buffer = 0;
let mut might_be_invalid_iri = false;
let mut ends_with_unescaped_dot = 0;
loop {
if let Some(r) = Self::recognize_unicode_char(&data[i..], i) {
match r {
Ok((c, consumed)) => {
if c == '%' {
i += 1;
let a = char::from(*data.get(i)?);
i += 1;
let b = char::from(*data.get(i)?);
if !a.is_ascii_hexdigit() || !b.is_ascii_hexdigit() {
return Some((i + 1, Err((
i - 2..=i, format!("escapes in IRIs should be % followed by two hexadecimal characters, found '%{a}{b}'")
).into())));
}
i += 1;
ends_with_unescaped_dot = 0;
} else if c == '\\' {
i += 1;
let a = char::from(*data.get(i)?);
if self.unchecked
|| matches!(
a,
'_' | '~'
| '.'
| '-'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
)
{
// ok to escape
} else if matches!(a, '/' | '?' | '#' | '@' | '%') {
// ok to escape but requires IRI validation
might_be_invalid_iri = true;
} else {
return Some((i + 1, Err((
i..=i, format!("The character that are allowed to be escaped in IRIs are _~.-!$&'()*+,;=/?#@%, found '{a}'")
).into())));
}
let buffer = buffer.get_or_insert_with(String::new);
// We add the missing bytes
if i - position_that_is_already_in_buffer > 1 {
buffer.push_str(
match str_from_utf8(
&data[position_that_is_already_in_buffer..i - 1],
position_that_is_already_in_buffer..i - 1,
) {
Ok(data) => data,
Err(e) => return Some((i, Err(e))),
},
)
}
buffer.push(a);
i += 1;
position_that_is_already_in_buffer = i;
ends_with_unescaped_dot = 0;
} else if i == 0 {
if !(Self::is_possible_pn_chars_u(c) || c == ':' || c.is_ascii_digit())
{
return Some((0, Ok((Cow::Borrowed(""), false))));
}
if !self.unchecked {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':';
}
i += consumed;
} else if Self::is_possible_pn_chars(c) || c == ':' {
if !self.unchecked {
might_be_invalid_iri |=
Self::is_possible_pn_chars_base_but_not_valid_iri(c)
|| c == ':';
}
i += consumed;
ends_with_unescaped_dot = 0;
} else if c == '.' {
i += consumed;
ends_with_unescaped_dot += 1;
} else {
let buffer = if let Some(mut buffer) = buffer {
buffer.push_str(
match str_from_utf8(
&data[position_that_is_already_in_buffer..i],
position_that_is_already_in_buffer..i,
) {
Ok(data) => data,
Err(e) => return Some((i, Err(e))),
},
);
// We do not include the last dots
for _ in 0..ends_with_unescaped_dot {
buffer.pop();
}
i -= ends_with_unescaped_dot;
Cow::Owned(buffer)
} else {
let mut data = match str_from_utf8(&data[..i], 0..i) {
Ok(data) => data,
Err(e) => return Some((i, Err(e))),
};
// We do not include the last dots
data = &data[..data.len() - ends_with_unescaped_dot];
i -= ends_with_unescaped_dot;
Cow::Borrowed(data)
};
return Some((i, Ok((buffer, might_be_invalid_iri))));
}
}
Err(e) => return Some((e.location.end, Err(e))),
}
} else if is_ending {
let buffer = if let Some(mut buffer) = buffer {
// We do not include the last dot
while buffer.ends_with('.') {
buffer.pop();
i -= 1;
}
Cow::Owned(buffer)
} else {
let mut data = match str_from_utf8(&data[..i], 0..i) {
Ok(data) => data,
Err(e) => return Some((i, Err(e))),
};
// We do not include the last dot
while let Some(d) = data.strip_suffix('.') {
data = d;
i -= 1;
}
Cow::Borrowed(data)
};
return Some((i, Ok((buffer, might_be_invalid_iri))));
} else {
return None;
}
}
}
fn recognize_blank_node_label(
data: &[u8],
) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
let mut i = 2;
loop {
match Self::recognize_unicode_char(&data[i..], i)? {
Ok((c, consumed)) => {
if (i == 2 && (Self::is_possible_pn_chars_u(c) || c.is_ascii_digit()))
|| (i > 2 && Self::is_possible_pn_chars(c))
{
// Ok
} else if i > 2 && c == '.' {
if data[i - 1] == b'.' {
i -= 1;
return Some((
i,
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel),
));
}
} else if i == 0 {
return Some((
i,
Err((0..i, "A blank node ID should not be empty").into()),
));
} else if data[i - 1] == b'.' {
i -= 1;
return Some((
i,
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel),
));
} else {
return Some((
i,
str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel),
));
}
i += consumed;
}
Err(e) => return Some((e.location.end, Err(e))),
}
}
}
fn recognize_lang_tag<'a>(
&self,
data: &'a [u8],
) -> Option<(usize, Result<N3Token<'a>, TokenRecognizerError>)> {
// [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
let mut is_last_block_empty = true;
for (i, c) in data[1..].iter().enumerate() {
if c.is_ascii_alphabetic() {
is_last_block_empty = false;
} else if i == 0 {
return Some((
1,
Err((1..2, "A language code should always start with a letter").into()),
));
} else if is_last_block_empty {
return Some((i, self.parse_lang_tag(&data[1..i], 1..i - 1)));
} else if *c == b'-' {
is_last_block_empty = true;
} else {
return Some((i + 1, self.parse_lang_tag(&data[1..=i], 1..i)));
}
}
None
}
fn parse_lang_tag<'a>(
&self,
lang_tag: &'a [u8],
position: Range<usize>,
) -> Result<N3Token<'a>, TokenRecognizerError> {
let lang_tag = str_from_utf8(lang_tag, position.clone())?;
Ok(N3Token::LangTag(if self.unchecked {
lang_tag
} else {
LanguageTag::parse(lang_tag)
.map_err(|e| (position.clone(), e.to_string()))?
.into_inner()
}))
}
fn recognize_string(
data: &[u8],
delimiter: u8,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
// [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
let mut string = String::new();
let mut i = 1;
loop {
let end = memchr2(delimiter, b'\\', &data[i..])?;
match str_from_utf8(&data[i..i + end], i..i + end) {
Ok(s) => string.push_str(s),
Err(e) => return Some((end, Err(e))),
};
i += end;
match data[i] {
c if c == delimiter => {
return Some((i + 1, Ok(N3Token::String(string))));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?;
i += additional + 1;
match c {
Ok(c) => {
string.push(c);
}
Err(e) => {
// We read until the end of string char
let end = memchr(delimiter, &data[i..])?;
return Some((i + end + 1, Err(e)));
}
}
}
_ => unreachable!(),
}
}
}
fn recognize_long_string(
data: &[u8],
delimiter: u8,
) -> Option<(usize, Result<N3Token<'static>, TokenRecognizerError>)> {
// [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''"
// [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""'
let mut string = String::new();
let mut i = 3;
loop {
let end = memchr2(delimiter, b'\\', &data[i..])?;
match str_from_utf8(&data[i..i + end], i..i + end) {
Ok(s) => string.push_str(s),
Err(e) => return Some((end, Err(e))),
};
i += end;
match data[i] {
c if c == delimiter => {
if *data.get(i + 1)? == delimiter && *data.get(i + 2)? == delimiter {
return Some((i + 3, Ok(N3Token::String(string))));
}
i += 1;
string.push(char::from(delimiter));
}
b'\\' => {
let (additional, c) = Self::recognize_escape(&data[i..], i, true)?;
i += additional + 1;
match c {
Ok(c) => {
string.push(c);
}
Err(e) => return Some((i, Err(e))),
}
}
_ => unreachable!(),
}
}
}
fn recognize_number(data: &[u8]) -> Option<(usize, Result<N3Token<'_>, TokenRecognizerError>)> {
// [19] INTEGER ::= [+-]? [0-9]+
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
let mut i = 0;
let c = *data.first()?;
if matches!(c, b'+' | b'-') {
i += 1;
}
// We read the digits before .
let mut count_before: usize = 0;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
count_before += 1;
} else {
break;
}
}
// We read the digits after .
#[allow(clippy::if_then_some_else_none)]
let count_after = if *data.get(i)? == b'.' {
i += 1;
let mut count_after = 0;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
count_after += 1;
} else {
break;
}
}
Some(count_after)
} else {
None
};
// End
let c = *data.get(i)?;
if matches!(c, b'e' | b'E') {
i += 1;
let c = *data.get(i)?;
if matches!(c, b'+' | b'-') {
i += 1;
}
let mut found = false;
loop {
let c = *data.get(i)?;
if c.is_ascii_digit() {
i += 1;
found = true;
} else {
break;
}
}
Some((
i,
if !found {
Err((0..i, "A double exponent cannot be empty").into())
} else if count_before == 0 && count_after.unwrap_or(0) == 0 {
Err((0..i, "A double should not be empty").into())
} else {
str_from_utf8(&data[..i], 0..i).map(N3Token::Double)
},
))
} else if let Some(count_after) = count_after {
if count_after == 0 {
// We do not consume the '.' after all
i -= 1;
Some((
i,
if count_before == 0 {
Err((0..i, "An integer should not be empty").into())
} else {
str_from_utf8(&data[..i], 0..i).map(N3Token::Integer)
},
))
} else {
Some((i, str_from_utf8(&data[..i], 0..i).map(N3Token::Decimal)))
}
} else {
Some((
i,
if count_before == 0 {
Err((0..i, "An integer should not be empty").into())
} else {
str_from_utf8(&data[..i], 0..i).map(N3Token::Integer)
},
))
}
}
fn recognize_escape(
data: &[u8],
position: usize,
with_echar: bool,
) -> Option<(usize, Result<char, TokenRecognizerError>)> {
// [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX
// [159s] ECHAR ::= '\' [tbnrf"'\]
match *data.get(1)? {
b'u' => match Self::recognize_hex_char(&data[2..], 4, 'u', position) {
Ok(c) => Some((5, Ok(c?))),
Err(e) => Some((5, Err(e))),
},
b'U' => match Self::recognize_hex_char(&data[2..], 8, 'u', position) {
Ok(c) => Some((9, Ok(c?))),
Err(e) => Some((9, Err(e))),
},
b't' if with_echar => Some((1, Ok('\t'))),
b'b' if with_echar => Some((1, Ok('\x08'))),
b'n' if with_echar => Some((1, Ok('\n'))),
b'r' if with_echar => Some((1, Ok('\r'))),
b'f' if with_echar => Some((1, Ok('\x0C'))),
b'"' if with_echar => Some((1, Ok('"'))),
b'\'' if with_echar => Some((1, Ok('\''))),
b'\\' if with_echar => Some((1, Ok('\\'))),
c => Some((
1,
Err((
position..position + 2,
format!("Unexpected escape character '\\{}'", char::from(c)),
)
.into()),
)), // TODO: read until end of string
}
}
fn recognize_hex_char(
data: &[u8],
len: usize,
escape_char: char,
position: usize,
) -> Result<Option<char>, TokenRecognizerError> {
if data.len() < len {
return Ok(None);
}
let val = str_from_utf8(&data[..len], position..position + len + 2)?;
let codepoint = u32::from_str_radix(val, 16).map_err(|e| {
(
position..position + len + 2,
format!(
"The escape sequence '\\{escape_char}{val}' is not a valid hexadecimal string: {e}"
),
)
})?;
let c = char::from_u32(codepoint).ok_or_else(|| {
(
position..position + len +2,
format!(
"The escape sequence '\\{escape_char}{val}' is encoding {codepoint:X} that is not a valid unicode character",
),
)
})?;
Ok(Some(c))
}
fn recognize_unicode_char(
data: &[u8],
position: usize,
) -> Option<Result<(char, usize), TokenRecognizerError>> {
let mut code_point: u32;
let bytes_needed: usize;
let mut lower_boundary = 0x80;
let mut upper_boundary = 0xBF;
let byte = *data.first()?;
match byte {
0x00..=0x7F => return Some(Ok((char::from(byte), 1))),
0xC2..=0xDF => {
bytes_needed = 1;
code_point = u32::from(byte) & 0x1F;
}
0xE0..=0xEF => {
if byte == 0xE0 {
lower_boundary = 0xA0;
}
if byte == 0xED {
upper_boundary = 0x9F;
}
bytes_needed = 2;
code_point = u32::from(byte) & 0xF;
}
0xF0..=0xF4 => {
if byte == 0xF0 {
lower_boundary = 0x90;
}
if byte == 0xF4 {
upper_boundary = 0x8F;
}
bytes_needed = 3;
code_point = u32::from(byte) & 0x7;
}
_ => {
return Some(Err((
position..=position,
"Invalid UTF-8 character encoding",
)
.into()))
}
}
for i in 1..=bytes_needed {
let byte = *data.get(i)?;
if byte < lower_boundary || upper_boundary < byte {
return Some(Err((
position..=position + i,
"Invalid UTF-8 character encoding",
)
.into()));
}
lower_boundary = 0x80;
upper_boundary = 0xBF;
code_point = (code_point << 6) | (u32::from(byte) & 0x3F);
}
Some(
char::from_u32(code_point)
.map(|c| (c, bytes_needed + 1))
.ok_or_else(|| {
(
position..=position + bytes_needed,
format!("The codepoint {code_point:X} is not a valid unicode character"),
)
.into()
}),
)
}
// [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
fn is_possible_pn_chars_base(c: char) -> bool {
matches!(c,
'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
// [158s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | ':'
pub(super) fn is_possible_pn_chars_u(c: char) -> bool {
Self::is_possible_pn_chars_base(c) || c == '_'
}
// [160s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
pub(crate) fn is_possible_pn_chars(c: char) -> bool {
Self::is_possible_pn_chars_u(c)
|| matches!(c,
'-' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}
fn is_possible_pn_chars_base_but_not_valid_iri(c: char) -> bool {
matches!(c, '\u{FFF0}'..='\u{FFFD}')
|| u32::from(c) % u32::from('\u{FFFE}') == 0
|| u32::from(c) % u32::from('\u{FFFF}') == 0
}
}
pub fn resolve_local_name(
prefix: &str,
local: &str,
might_be_invalid_iri: bool,
prefixes: &HashMap<String, Iri<String>>,
) -> Result<NamedNode, String> {
if let Some(start) = prefixes.get(prefix) {
let iri = format!("{start}{local}");
if might_be_invalid_iri || start.path().is_empty() {
// We validate again. We always validate if the local part might be the IRI authority.
if let Err(e) = Iri::parse(iri.as_str()) {
return Err(format!(
"The prefixed name {prefix}:{local} builds IRI {iri} that is invalid: {e}"
));
}
}
Ok(NamedNode::new_unchecked(iri))
} else {
Err(format!("The prefix {prefix}: has not been declared"))
}
}
fn str_from_utf8(data: &[u8], range: Range<usize>) -> Result<&str, TokenRecognizerError> {
str::from_utf8(data).map_err(|e| {
(
range.start + e.valid_up_to()..min(range.end, range.start + e.valid_up_to() + 4),
format!("Invalid UTF-8: {e}"),
)
.into()
})
}
fn string_from_utf8(data: Vec<u8>, range: Range<usize>) -> Result<String, TokenRecognizerError> {
String::from_utf8(data).map_err(|e| {
(
range.start + e.utf8_error().valid_up_to()
..min(range.end, range.start + e.utf8_error().valid_up_to() + 4),
format!("Invalid UTF-8: {e}"),
)
.into()
})
}

@ -0,0 +1,314 @@
//! Shared parser implementation for N-Triples and N-Quads.
#[cfg(feature = "rdf-star")]
use crate::oxrdf::Triple;
use crate::oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term};
use crate::oxttl::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
use crate::oxttl::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError};
use crate::oxttl::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
pub struct NQuadsRecognizer {
stack: Vec<NQuadsState>,
subjects: Vec<Subject>,
predicates: Vec<NamedNode>,
objects: Vec<Term>,
}
pub struct NQuadsRecognizerContext {
with_graph_name: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
lexer_options: N3LexerOptions,
}
enum NQuadsState {
ExpectSubject,
ExpectPredicate,
ExpectedObject,
ExpectPossibleGraphOrEndOfQuotedTriple,
ExpectDot,
ExpectLiteralAnnotationOrGraphNameOrDot {
value: String,
},
ExpectLiteralDatatype {
value: String,
},
#[cfg(feature = "rdf-star")]
AfterQuotedSubject,
#[cfg(feature = "rdf-star")]
AfterQuotedObject,
}
impl RuleRecognizer for NQuadsRecognizer {
type TokenRecognizer = N3Lexer;
type Output = Quad;
type Context = NQuadsRecognizerContext;
fn error_recovery_state(mut self) -> Self {
self.stack.clear();
self.subjects.clear();
self.predicates.clear();
self.objects.clear();
self
}
fn recognize_next(
mut self,
token: N3Token<'_>,
context: &mut NQuadsRecognizerContext,
results: &mut Vec<Quad>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self {
if let Some(state) = self.stack.pop() {
match state {
NQuadsState::ExpectSubject => match token {
N3Token::IriRef(s) => {
self.subjects
.push(NamedNode::new_unchecked(s).into());
self.stack.push(NQuadsState::ExpectPredicate);
self
}
N3Token::BlankNodeLabel(s) => {
self.subjects.push(BlankNode::new_unchecked(s).into());
self.stack.push(NQuadsState::ExpectPredicate);
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if context.with_quoted_triples => {
self.stack.push(NQuadsState::AfterQuotedSubject);
self.stack.push(NQuadsState::ExpectSubject);
self
}
_ => self.error(
errors,
"The subject of a triple should be an IRI or a blank node, TOKEN found",
),
},
NQuadsState::ExpectPredicate => match token {
N3Token::IriRef(p) => {
self.predicates
.push(NamedNode::new_unchecked(p));
self.stack.push(NQuadsState::ExpectedObject);
self
}
_ => self.error(
errors,
"The predicate of a triple should be an IRI, TOKEN found",
),
},
NQuadsState::ExpectedObject => match token {
N3Token::IriRef(o) => {
self.objects
.push(NamedNode::new_unchecked(o).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::BlankNodeLabel(o) => {
self.objects.push(BlankNode::new_unchecked(o).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::String(value) => {
self.stack
.push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value });
self
}
#[cfg(feature = "rdf-star")]
N3Token::Punctuation("<<") if context.with_quoted_triples => {
self.stack.push(NQuadsState::AfterQuotedObject);
self.stack.push(NQuadsState::ExpectSubject);
self
}
_ => self.error(
errors,
"The object of a triple should be an IRI, a blank node or a literal, TOKEN found",
),
},
NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
N3Token::LangTag(lang_tag) => {
self.objects.push(
Literal::new_language_tagged_literal_unchecked(
value,
lang_tag.to_ascii_lowercase(),
)
.into(),
);
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
N3Token::Punctuation("^^") => {
self.stack
.push(NQuadsState::ExpectLiteralDatatype { value });
self
}
_ => {
self.objects.push(Literal::new_simple_literal(value).into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self.recognize_next(token, context, results, errors)
}
},
NQuadsState::ExpectLiteralDatatype { value } => match token {
N3Token::IriRef(d) => {
self.objects.push(
Literal::new_typed_literal(
value,
NamedNode::new_unchecked(d)
)
.into(),
);
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self
}
_ => self.error(errors, "A literal datatype must be an IRI, found TOKEN"),
},
NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
if self.stack.is_empty() {
match token {
N3Token::IriRef(g) if context.with_graph_name => {
self.emit_quad(
results,
NamedNode::new_unchecked(g).into(),
);
self.stack.push(NQuadsState::ExpectDot);
self
}
N3Token::BlankNodeLabel(g) if context.with_graph_name => {
self.emit_quad(results, BlankNode::new_unchecked(g).into());
self.stack.push(NQuadsState::ExpectDot);
self
}
_ => {
self.emit_quad(results, GraphName::DefaultGraph);
self.stack.push(NQuadsState::ExpectDot);
self.recognize_next(token, context, results, errors)
}
}
} else if token == N3Token::Punctuation(">>") {
self
} else {
self.error(errors, "Expecting the end of a quoted triple '>>'")
}
}
NQuadsState::ExpectDot => if let N3Token::Punctuation(".") = token {
self.stack.push(NQuadsState::ExpectSubject);
self
} else {
errors.push("Quads should be followed by a dot".into());
self.stack.push(NQuadsState::ExpectSubject);
self.recognize_next(token, context, results, errors)
},
#[cfg(feature = "rdf-star")]
NQuadsState::AfterQuotedSubject => {
let triple = Triple {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
};
self.subjects.push(triple.into());
self.stack.push(NQuadsState::ExpectPredicate);
self.recognize_next(token,context, results, errors)
}
#[cfg(feature = "rdf-star")]
NQuadsState::AfterQuotedObject => {
let triple = Triple {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
};
self.objects.push(triple.into());
self.stack
.push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
self.recognize_next(token, context, results, errors)
}
}
} else if token == N3Token::Punctuation(".") {
self.stack.push(NQuadsState::ExpectSubject);
self
} else {
self
}
}
fn recognize_end(
mut self,
_context: &mut NQuadsRecognizerContext,
results: &mut Vec<Quad>,
errors: &mut Vec<RuleRecognizerError>,
) {
match &*self.stack {
[NQuadsState::ExpectSubject] | [] => (),
[NQuadsState::ExpectDot] => errors.push("Triples should be followed by a dot".into()),
[NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => {
self.emit_quad(results, GraphName::DefaultGraph);
errors.push("Triples should be followed by a dot".into())
}
[NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }] => {
self.objects.push(Literal::new_simple_literal(value).into());
self.emit_quad(results, GraphName::DefaultGraph);
errors.push("Triples should be followed by a dot".into())
}
_ => errors.push("Unexpected end".into()), // TODO
}
}
fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions {
&context.lexer_options
}
}
impl NQuadsRecognizer {
pub fn new_parser(
with_graph_name: bool,
#[cfg(feature = "rdf-star")] with_quoted_triples: bool,
unchecked: bool,
) -> Parser<Self> {
Parser::new(
Lexer::new(
N3Lexer::new(N3LexerMode::NTriples, unchecked),
MIN_BUFFER_SIZE,
MAX_BUFFER_SIZE,
true,
Some(b"#"),
),
Self {
stack: vec![NQuadsState::ExpectSubject],
subjects: Vec::new(),
predicates: Vec::new(),
objects: Vec::new(),
},
NQuadsRecognizerContext {
with_graph_name,
#[cfg(feature = "rdf-star")]
with_quoted_triples,
lexer_options: N3LexerOptions::default(),
},
)
}
#[must_use]
fn error(
mut self,
errors: &mut Vec<RuleRecognizerError>,
msg: impl Into<RuleRecognizerError>,
) -> Self {
errors.push(msg.into());
self.stack.clear();
self.subjects.clear();
self.predicates.clear();
self.objects.clear();
self
}
fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) {
results.push(Quad {
subject: self.subjects.pop().unwrap(),
predicate: self.predicates.pop().unwrap(),
object: self.objects.pop().unwrap(),
graph_name,
})
}
}

@ -0,0 +1,19 @@
mod lexer;
mod line_formats;
pub mod n3;
pub mod nquads;
pub mod ntriples;
mod terse;
mod toolkit;
pub mod trig;
pub mod turtle;
pub use crate::oxttl::n3::N3Parser;
pub use crate::oxttl::nquads::{NQuadsParser, NQuadsSerializer};
pub use crate::oxttl::ntriples::{NTriplesParser, NTriplesSerializer};
pub use crate::oxttl::toolkit::{TextPosition, TurtleParseError, TurtleSyntaxError};
pub use crate::oxttl::trig::{TriGParser, TriGSerializer};
pub use crate::oxttl::turtle::{TurtleParser, TurtleSerializer};
pub(crate) const MIN_BUFFER_SIZE: usize = 4096;
pub(crate) const MAX_BUFFER_SIZE: usize = 4096 * 4096;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,564 @@
//! A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser implemented by [`NQuadsParser`]
//! and a serializer implemented by [`NQuadsSerializer`].
use crate::oxrdf::{Quad, QuadRef};
use crate::oxttl::line_formats::NQuadsRecognizer;
#[cfg(feature = "async-tokio")]
use crate::oxttl::toolkit::FromTokioAsyncReadIterator;
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError};
use std::io::{self, Read, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser.
///
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature and the [`NQuadsParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct NQuadsParser {
unchecked: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl NQuadsParser {
/// Builds a new [`NQuadsParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
/// Enables [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star).
#[cfg(feature = "rdf-star")]
#[inline]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a N-Quads file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_read<R: Read>(self, read: R) -> FromReadNQuadsReader<R> {
FromReadNQuadsReader {
inner: self.parse().parser.parse_read(read),
}
}
/// Parses a N-Quads file from a [`AsyncRead`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
read: R,
) -> FromTokioAsyncReadNQuadsReader<R> {
FromTokioAsyncReadNQuadsReader {
inner: self.parse().parser.parse_tokio_async_read(read),
}
}
/// Allows to parse a N-Quads file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn parse(self) -> LowLevelNQuadsReader {
LowLevelNQuadsReader {
parser: NQuadsRecognizer::new_parser(
true,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
),
}
}
}
/// Parses a N-Quads file from a [`Read`] implementation. Can be built using [`NQuadsParser::parse_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for quad in NQuadsParser::new().parse_read(file.as_ref()) {
/// let quad = quad?;
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct FromReadNQuadsReader<R: Read> {
inner: FromReadIterator<R, NQuadsRecognizer>,
}
impl<R: Read> Iterator for FromReadNQuadsReader<R> {
type Item = Result<Quad, TurtleParseError>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
/// Parses a N-Quads file from a [`AsyncRead`] implementation. Can be built using [`NQuadsParser::parse_tokio_async_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct FromTokioAsyncReadNQuadsReader<R: AsyncRead + Unpin> {
inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadNQuadsReader<R> {
/// Reads the next triple or returns `None` if the file is finished.
pub async fn next(&mut self) -> Option<Result<Quad, TurtleParseError>> {
Some(self.inner.next().await?.map(Into::into))
}
}
/// Parses a N-Quads file by using a low-level API. Can be built using [`NQuadsParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NQuadsParser;
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NQuadsParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many quads from the parser as possible
/// while let Some(quad) = parser.read_next() {
/// let quad = quad?;
/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNQuadsReader {
parser: Parser<NQuadsRecognizer>,
}
impl LowLevelNQuadsReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new quad from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Quad, TurtleSyntaxError>> {
self.parser.read_next()
}
}
/// A [N-Quads](https://www.w3.org/TR/n-quads/) serializer.
///
/// Support for [N-Quads-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-quads-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct NQuadsSerializer;
impl NQuadsSerializer {
/// Builds a new [`NQuadsSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a N-Quads file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNQuadsWriter<W> {
ToWriteNQuadsWriter {
write,
writer: self.serialize(),
}
}
/// Writes a N-Quads file to a [`AsyncWrite`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
/// NamedNodeRef::new_unchecked("http://example.com"),
/// )).await?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteNQuadsWriter<W> {
ToTokioAsyncWriteNQuadsWriter {
write,
writer: self.serialize(),
buffer: Vec::new(),
}
}
/// Builds a low-level N-Quads writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize(self) -> LowLevelNQuadsWriter {
LowLevelNQuadsWriter
}
}
/// Writes a N-Quads file to a [`Write`] implementation. Can be built using [`NQuadsSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteNQuadsWriter<W: Write> {
write: W,
writer: LowLevelNQuadsWriter,
}
impl<W: Write> ToWriteNQuadsWriter<W> {
/// Writes an extra quad.
pub fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(q, &mut self.write)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Quads file to a [`AsyncWrite`] implementation. Can be built using [`NQuadsSerializer::serialize_to_tokio_async_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = NQuadsSerializer::new().serialize_to_tokio_async_write(Vec::new());
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
/// NamedNodeRef::new_unchecked("http://example.com"),
/// )).await?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// writer.finish().as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteNQuadsWriter<W: AsyncWrite + Unpin> {
write: W,
writer: LowLevelNQuadsWriter,
buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNQuadsWriter<W> {
/// Writes an extra quad.
pub async fn write_quad<'a>(&mut self, q: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(q, &mut self.buffer)?;
self.write.write_all(&self.buffer).await?;
self.buffer.clear();
Ok(())
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Quads file by using a low-level API. Can be built using [`NQuadsSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, QuadRef};
/// use oxttl::NQuadsSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NQuadsSerializer::new().serialize();
/// writer.write_quad(QuadRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// NamedNodeRef::new("http://example.com")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://example.com> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNQuadsWriter;
impl LowLevelNQuadsWriter {
/// Writes an extra quad.
#[allow(clippy::unused_self)]
pub fn write_quad<'a>(
&mut self,
q: impl Into<QuadRef<'a>>,
mut write: impl Write,
) -> io::Result<()> {
writeln!(write, "{} .", q.into())
}
}

@ -0,0 +1,580 @@
//! A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser implemented by [`NTriplesParser`]
//! and a serializer implemented by [`NTriplesSerializer`].
use crate::oxrdf::{Triple, TripleRef};
use crate::oxttl::line_formats::NQuadsRecognizer;
#[cfg(feature = "async-tokio")]
use crate::oxttl::toolkit::FromTokioAsyncReadIterator;
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError};
use std::io::{self, Read, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
/// A [N-Triples](https://www.w3.org/TR/n-triples/) streaming parser.
///
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature and the [`NTriplesParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct NTriplesParser {
unchecked: bool,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl NTriplesParser {
/// Builds a new [`NTriplesParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser. ///
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
/// Enables [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star).
#[cfg(feature = "rdf-star")]
#[inline]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a N-Triples file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_read<R: Read>(self, read: R) -> FromReadNTriplesReader<R> {
FromReadNTriplesReader {
inner: self.parse().parser.parse_read(read),
}
}
/// Parses a N-Triples file from a [`AsyncRead`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
read: R,
) -> FromTokioAsyncReadNTriplesReader<R> {
FromTokioAsyncReadNTriplesReader {
inner: self.parse().parser.parse_tokio_async_read(read),
}
}
/// Allows to parse a N-Triples file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn parse(self) -> LowLevelNTriplesReader {
LowLevelNTriplesReader {
parser: NQuadsRecognizer::new_parser(
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
),
}
}
}
/// Parses a N-Triples file from a [`Read`] implementation. Can be built using [`NTriplesParser::parse_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in NTriplesParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct FromReadNTriplesReader<R: Read> {
inner: FromReadIterator<R, NQuadsRecognizer>,
}
impl<R: Read> Iterator for FromReadNTriplesReader<R> {
type Item = Result<Triple, TurtleParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.inner.next()?.map(Into::into))
}
}
/// Parses a N-Triples file from a [`AsyncRead`] implementation. Can be built using [`NTriplesParser::parse_tokio_async_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/foo> <http://schema.org/name> "Foo" .
/// <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
/// <http://example.com/bar> <http://schema.org/name> "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct FromTokioAsyncReadNTriplesReader<R: AsyncRead + Unpin> {
inner: FromTokioAsyncReadIterator<R, NQuadsRecognizer>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadNTriplesReader<R> {
/// Reads the next triple or returns `None` if the file is finished.
pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> {
Some(self.inner.next().await?.map(Into::into))
}
}
/// Parses a N-Triples file by using a low-level API. Can be built using [`NTriplesParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::{NamedNodeRef, vocab::rdf};
/// use oxttl::NTriplesParser;
///
/// let file: [&[u8]; 4] = [
/// b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/foo> <http://schema.org/name> \"Foo\" .\n",
/// b"<http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// b"<http://example.com/bar> <http://schema.org/name> \"Bar\" .\n"
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = NTriplesParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNTriplesReader {
parser: Parser<NQuadsRecognizer>,
}
impl LowLevelNTriplesReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new triple from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> {
Some(self.parser.read_next()?.map(Into::into))
}
}
/// A [canonical](https://www.w3.org/TR/n-triples/#canonical-ntriples) [N-Triples](https://www.w3.org/TR/n-triples/) serializer.
///
/// Support for [N-Triples-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#n-triples-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct NTriplesSerializer;
impl NTriplesSerializer {
/// Builds a new [`NTriplesSerializer`].
#[inline]
pub fn new() -> Self {
Self
}
/// Writes a N-Triples file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteNTriplesWriter<W> {
ToWriteNTriplesWriter {
write,
writer: self.serialize(),
}
}
/// Writes a N-Triples file to a [`AsyncWrite`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
/// )).await?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteNTriplesWriter<W> {
ToTokioAsyncWriteNTriplesWriter {
write,
writer: self.serialize(),
buffer: Vec::new(),
}
}
/// Builds a low-level N-Triples writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[allow(clippy::unused_self)]
pub fn serialize(self) -> LowLevelNTriplesWriter {
LowLevelNTriplesWriter
}
}
/// Writes a N-Triples file to a [`Write`] implementation. Can be built using [`NTriplesSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut writer = NTriplesSerializer::new().serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteNTriplesWriter<W: Write> {
write: W,
writer: LowLevelNTriplesWriter,
}
impl<W: Write> ToWriteNTriplesWriter<W> {
/// Writes an extra triple.
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(t, &mut self.write)
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Triples file to a [`AsyncWrite`] implementation. Can be built using [`NTriplesSerializer::serialize_to_tokio_async_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let mut writer = NTriplesSerializer::new().serialize_to_tokio_async_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person")
/// )).await?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// writer.finish().as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteNTriplesWriter<W: AsyncWrite + Unpin> {
write: W,
writer: LowLevelNTriplesWriter,
buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteNTriplesWriter<W> {
/// Writes an extra triple.
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(t, &mut self.buffer)?;
self.write.write_all(&self.buffer).await?;
self.buffer.clear();
Ok(())
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> W {
self.write
}
}
/// Writes a N-Triples file by using a low-level API. Can be built using [`NTriplesSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::NTriplesSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = NTriplesSerializer::new().serialize();
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ), &mut buf)?;
/// assert_eq!(
/// b"<http://example.com#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelNTriplesWriter;
impl LowLevelNTriplesWriter {
/// Writes an extra triple.
#[allow(clippy::unused_self)]
pub fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
mut write: impl Write,
) -> io::Result<()> {
writeln!(write, "{} .", t.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::oxrdf::{Literal, NamedNode};
#[test]
fn unchecked_parsing() {
let triples = NTriplesParser::new()
.unchecked()
.parse_read(r#"<foo> <bar> "baz"@toolonglangtag ."#.as_bytes())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
triples,
[Triple::new(
NamedNode::new_unchecked("foo"),
NamedNode::new_unchecked("bar"),
Literal::new_language_tagged_literal_unchecked("baz", "toolonglangtag"),
)]
)
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,97 @@
use std::ops::Range;
use std::{fmt, io};
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub struct TextPosition {
pub line: u64,
pub column: u64,
pub offset: u64,
}
/// An error in the syntax of the parsed file.
///
/// It is composed of a message and a byte range in the input.
#[derive(Debug, thiserror::Error)]
pub struct TurtleSyntaxError {
pub(super) location: Range<TextPosition>,
pub(super) message: String,
}
impl TurtleSyntaxError {
/// The location of the error inside of the file.
#[inline]
pub fn location(&self) -> Range<TextPosition> {
self.location.clone()
}
/// The error message.
#[inline]
pub fn message(&self) -> &str {
&self.message
}
}
impl fmt::Display for TurtleSyntaxError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.location.start.offset + 1 >= self.location.end.offset {
write!(
f,
"Parser error at line {} column {}: {}",
self.location.start.line + 1,
self.location.start.column + 1,
self.message
)
} else if self.location.start.line == self.location.end.line {
write!(
f,
"Parser error between at line {} between columns {} and column {}: {}",
self.location.start.line + 1,
self.location.start.column + 1,
self.location.end.column + 1,
self.message
)
} else {
write!(
f,
"Parser error between line {} column {} and line {} column {}: {}",
self.location.start.line + 1,
self.location.start.column + 1,
self.location.end.line + 1,
self.location.end.column + 1,
self.message
)
}
}
}
impl From<TurtleSyntaxError> for io::Error {
#[inline]
fn from(error: TurtleSyntaxError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// A parsing error.
///
/// It is the union of [`TurtleSyntaxError`] and [`io::Error`].
#[derive(Debug, thiserror::Error)]
pub enum TurtleParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] TurtleSyntaxError),
}
impl From<TurtleParseError> for io::Error {
#[inline]
fn from(error: TurtleParseError) -> Self {
match error {
TurtleParseError::Syntax(e) => e.into(),
TurtleParseError::Io(e) => e,
}
}
}

@ -0,0 +1,432 @@
use crate::oxttl::toolkit::error::{TextPosition, TurtleSyntaxError};
use memchr::{memchr2, memchr2_iter};
use std::borrow::Cow;
use std::cmp::min;
use std::io::{self, Read};
use std::ops::{Range, RangeInclusive};
use std::str;
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncReadExt};
pub trait TokenRecognizer {
type Token<'a>
where
Self: 'a;
type Options: Default;
fn recognize_next_token<'a>(
&mut self,
data: &'a [u8],
is_ending: bool,
config: &Self::Options,
) -> Option<(usize, Result<Self::Token<'a>, TokenRecognizerError>)>;
}
pub struct TokenRecognizerError {
pub location: Range<usize>,
pub message: String,
}
impl<S: Into<String>> From<(Range<usize>, S)> for TokenRecognizerError {
fn from((location, message): (Range<usize>, S)) -> Self {
Self {
location,
message: message.into(),
}
}
}
#[allow(clippy::range_plus_one)]
impl<S: Into<String>> From<(RangeInclusive<usize>, S)> for TokenRecognizerError {
fn from((location, message): (RangeInclusive<usize>, S)) -> Self {
(*location.start()..*location.end() + 1, message).into()
}
}
impl<S: Into<String>> From<(usize, S)> for TokenRecognizerError {
fn from((location, message): (usize, S)) -> Self {
(location..=location, message).into()
}
}
pub struct Lexer<R: TokenRecognizer> {
parser: R,
data: Vec<u8>,
position: Position,
previous_position: Position, // Lexer position before the last emitted token
is_ending: bool,
min_buffer_size: usize,
max_buffer_size: usize,
is_line_jump_whitespace: bool,
line_comment_start: Option<&'static [u8]>,
}
#[derive(Clone, Copy)]
struct Position {
line_start_buffer_offset: usize,
buffer_offset: usize,
global_offset: u64,
global_line: u64,
}
impl<R: TokenRecognizer> Lexer<R> {
pub fn new(
parser: R,
min_buffer_size: usize,
max_buffer_size: usize,
is_line_jump_whitespace: bool,
line_comment_start: Option<&'static [u8]>,
) -> Self {
Self {
parser,
data: Vec::new(),
position: Position {
line_start_buffer_offset: 0,
buffer_offset: 0,
global_offset: 0,
global_line: 0,
},
previous_position: Position {
line_start_buffer_offset: 0,
buffer_offset: 0,
global_offset: 0,
global_line: 0,
},
is_ending: false,
min_buffer_size,
max_buffer_size,
is_line_jump_whitespace,
line_comment_start,
}
}
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.shrink_data();
self.data.extend_from_slice(other);
}
#[inline]
pub fn end(&mut self) {
self.is_ending = true;
}
pub fn extend_from_read(&mut self, read: &mut impl Read) -> io::Result<()> {
self.shrink_data();
if self.data.len() == self.max_buffer_size {
return Err(io::Error::new(
io::ErrorKind::OutOfMemory,
format!(
"Reached the buffer maximal size of {}",
self.max_buffer_size
),
));
}
let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size);
let new_start = self.data.len();
self.data.resize(min_end, 0);
if self.data.len() < self.data.capacity() {
// We keep extending to have as much space as available without reallocation
self.data.resize(self.data.capacity(), 0);
}
let read = read.read(&mut self.data[new_start..])?;
self.data.truncate(new_start + read);
self.is_ending = read == 0;
Ok(())
}
#[cfg(feature = "async-tokio")]
pub async fn extend_from_tokio_async_read(
&mut self,
read: &mut (impl AsyncRead + Unpin),
) -> io::Result<()> {
self.shrink_data();
if self.data.len() == self.max_buffer_size {
return Err(io::Error::new(
io::ErrorKind::OutOfMemory,
format!(
"Reached the buffer maximal size of {}",
self.max_buffer_size
),
));
}
let min_end = min(self.data.len() + self.min_buffer_size, self.max_buffer_size);
let new_start = self.data.len();
self.data.resize(min_end, 0);
if self.data.len() < self.data.capacity() {
// We keep extending to have as much space as available without reallocation
self.data.resize(self.data.capacity(), 0);
}
let read = read.read(&mut self.data[new_start..]).await?;
self.data.truncate(new_start + read);
self.is_ending = read == 0;
Ok(())
}
#[allow(clippy::unwrap_in_result)]
pub fn read_next(
&mut self,
options: &R::Options,
) -> Option<Result<R::Token<'_>, TurtleSyntaxError>> {
self.skip_whitespaces_and_comments()?;
self.previous_position = self.position;
let Some((consumed, result)) = self.parser.recognize_next_token(
&self.data[self.position.buffer_offset..],
self.is_ending,
options,
) else {
return if self.is_ending {
if self.position.buffer_offset == self.data.len() {
None // We have finished
} else {
let (new_line_jumps, new_line_start) =
Self::find_number_of_line_jumps_and_start_of_last_line(
&self.data[self.position.buffer_offset..],
);
if new_line_jumps > 0 {
self.position.line_start_buffer_offset =
self.position.buffer_offset + new_line_start;
}
self.position.global_offset +=
u64::try_from(self.data.len() - self.position.buffer_offset).unwrap();
self.position.buffer_offset = self.data.len();
self.position.global_line += new_line_jumps;
let new_position = TextPosition {
line: self.position.global_line,
column: Self::column_from_bytes(
&self.data[self.position.line_start_buffer_offset..],
),
offset: self.position.global_offset,
};
let error = TurtleSyntaxError {
location: new_position..new_position,
message: "Unexpected end of file".into(),
};
self.position.buffer_offset = self.data.len(); // We consume everything
Some(Err(error))
}
} else {
None
};
};
debug_assert!(
consumed > 0,
"The lexer must consume at least one byte each time"
);
debug_assert!(
self.position.buffer_offset + consumed <= self.data.len(),
"The lexer tried to consumed {consumed} bytes but only {} bytes are readable",
self.data.len() - self.position.buffer_offset
);
let (new_line_jumps, new_line_start) =
Self::find_number_of_line_jumps_and_start_of_last_line(
&self.data[self.position.buffer_offset..self.position.buffer_offset + consumed],
);
if new_line_jumps > 0 {
self.position.line_start_buffer_offset = self.position.buffer_offset + new_line_start;
}
self.position.buffer_offset += consumed;
self.position.global_offset += u64::try_from(consumed).unwrap();
self.position.global_line += new_line_jumps;
Some(result.map_err(|e| TurtleSyntaxError {
location: self.location_from_buffer_offset_range(e.location),
message: e.message,
}))
}
pub fn location_from_buffer_offset_range(
&self,
offset_range: Range<usize>,
) -> Range<TextPosition> {
let start_offset = self.previous_position.buffer_offset + offset_range.start;
let (start_extra_line_jumps, start_line_start) =
Self::find_number_of_line_jumps_and_start_of_last_line(
&self.data[self.previous_position.buffer_offset..start_offset],
);
let start_line_start = if start_extra_line_jumps > 0 {
start_line_start + self.previous_position.buffer_offset
} else {
self.previous_position.line_start_buffer_offset
};
let end_offset = self.previous_position.buffer_offset + offset_range.end;
let (end_extra_line_jumps, end_line_start) =
Self::find_number_of_line_jumps_and_start_of_last_line(
&self.data[self.previous_position.buffer_offset..end_offset],
);
let end_line_start = if end_extra_line_jumps > 0 {
end_line_start + self.previous_position.buffer_offset
} else {
self.previous_position.line_start_buffer_offset
};
TextPosition {
line: self.previous_position.global_line + start_extra_line_jumps,
column: Self::column_from_bytes(&self.data[start_line_start..start_offset]),
offset: self.previous_position.global_offset
+ u64::try_from(offset_range.start).unwrap(),
}..TextPosition {
line: self.previous_position.global_line + end_extra_line_jumps,
column: Self::column_from_bytes(&self.data[end_line_start..end_offset]),
offset: self.previous_position.global_offset + u64::try_from(offset_range.end).unwrap(),
}
}
pub fn last_token_location(&self) -> Range<TextPosition> {
TextPosition {
line: self.previous_position.global_line,
column: Self::column_from_bytes(
&self.data[self.previous_position.line_start_buffer_offset
..self.previous_position.buffer_offset],
),
offset: self.previous_position.global_offset,
}..TextPosition {
line: self.position.global_line,
column: Self::column_from_bytes(
&self.data[self.position.line_start_buffer_offset..self.position.buffer_offset],
),
offset: self.position.global_offset,
}
}
pub fn last_token_source(&self) -> Cow<'_, str> {
String::from_utf8_lossy(
&self.data[self.previous_position.buffer_offset..self.position.buffer_offset],
)
}
pub fn is_end(&self) -> bool {
self.is_ending && self.data.len() == self.position.buffer_offset
}
#[allow(clippy::unwrap_in_result)]
fn skip_whitespaces_and_comments(&mut self) -> Option<()> {
loop {
self.skip_whitespaces()?;
let buf = &self.data[self.position.buffer_offset..];
if let Some(line_comment_start) = self.line_comment_start {
if buf.starts_with(line_comment_start) {
// Comment
if let Some(end) = memchr2(b'\r', b'\n', &buf[line_comment_start.len()..]) {
let mut end_position = line_comment_start.len() + end;
if buf.get(end_position).copied() == Some(b'\r') {
// We look for \n for Windows line end style
if let Some(c) = buf.get(end_position + 1) {
if *c == b'\n' {
end_position += 1;
}
} else if !self.is_ending {
return None; // We need to read more
}
}
let comment_size = end_position + 1;
self.position.buffer_offset += comment_size;
self.position.line_start_buffer_offset = self.position.buffer_offset;
self.position.global_offset += u64::try_from(comment_size).unwrap();
self.position.global_line += 1;
continue;
}
if self.is_ending {
self.position.buffer_offset = self.data.len(); // EOF
return Some(());
}
return None; // We need more data
}
}
return Some(());
}
}
fn skip_whitespaces(&mut self) -> Option<()> {
if self.is_line_jump_whitespace {
let mut i = self.position.buffer_offset;
while let Some(c) = self.data.get(i) {
match c {
b' ' | b'\t' => {
self.position.buffer_offset += 1;
self.position.global_offset += 1;
}
b'\r' => {
// We look for \n for Windows line end style
let mut increment: u8 = 1;
if let Some(c) = self.data.get(i + 1) {
if *c == b'\n' {
increment += 1;
i += 1;
}
} else if !self.is_ending {
return None; // We need to read more
}
self.position.buffer_offset += usize::from(increment);
self.position.line_start_buffer_offset = self.position.buffer_offset;
self.position.global_offset += u64::from(increment);
self.position.global_line += 1;
}
b'\n' => {
self.position.buffer_offset += 1;
self.position.line_start_buffer_offset = self.position.buffer_offset;
self.position.global_offset += 1;
self.position.global_line += 1;
}
_ => return Some(()),
}
i += 1;
// TODO: SIMD
}
} else {
for c in &self.data[self.position.buffer_offset..] {
if matches!(c, b' ' | b'\t') {
self.position.buffer_offset += 1;
self.position.global_offset += 1;
} else {
return Some(());
}
// TODO: SIMD
}
}
Some(())
}
fn shrink_data(&mut self) {
if self.position.line_start_buffer_offset > 0 {
self.data
.copy_within(self.position.line_start_buffer_offset.., 0);
self.data
.truncate(self.data.len() - self.position.line_start_buffer_offset);
self.position.buffer_offset -= self.position.line_start_buffer_offset;
self.position.line_start_buffer_offset = 0;
self.previous_position = self.position;
}
}
fn find_number_of_line_jumps_and_start_of_last_line(bytes: &[u8]) -> (u64, usize) {
let mut num_of_jumps = 0;
let mut last_jump_pos = 0;
let mut previous_cr = 0;
for pos in memchr2_iter(b'\r', b'\n', bytes) {
if bytes[pos] == b'\r' {
previous_cr = pos;
num_of_jumps += 1;
last_jump_pos = pos + 1;
} else {
if previous_cr < pos - 1 {
// We count \r\n as a single line jump
num_of_jumps += 1;
}
last_jump_pos = pos + 1;
}
}
(num_of_jumps, last_jump_pos)
}
fn column_from_bytes(bytes: &[u8]) -> u64 {
match str::from_utf8(bytes) {
Ok(s) => u64::try_from(s.chars().count()).unwrap(),
Err(e) => {
if e.valid_up_to() == 0 {
0
} else {
Self::column_from_bytes(&bytes[..e.valid_up_to()])
}
}
}
}
}

@ -0,0 +1,13 @@
//! oxttl parsing toolkit.
//!
//! Provides the basic code to write plain Rust lexers and parsers able to read files chunk by chunk.
mod error;
mod lexer;
mod parser;
pub use self::error::{TextPosition, TurtleParseError, TurtleSyntaxError};
pub use self::lexer::{Lexer, TokenRecognizer, TokenRecognizerError};
#[cfg(feature = "async-tokio")]
pub use self::parser::FromTokioAsyncReadIterator;
pub use self::parser::{FromReadIterator, Parser, RuleRecognizer, RuleRecognizerError};

@ -0,0 +1,183 @@
use crate::oxttl::toolkit::error::{TurtleParseError, TurtleSyntaxError};
use crate::oxttl::toolkit::lexer::{Lexer, TokenRecognizer};
use std::io::Read;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
pub trait RuleRecognizer: Sized {
type TokenRecognizer: TokenRecognizer;
type Output;
type Context;
fn error_recovery_state(self) -> Self;
fn recognize_next(
self,
token: <Self::TokenRecognizer as TokenRecognizer>::Token<'_>,
context: &mut Self::Context,
results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
) -> Self;
fn recognize_end(
self,
context: &mut Self::Context,
results: &mut Vec<Self::Output>,
errors: &mut Vec<RuleRecognizerError>,
);
fn lexer_options(
context: &Self::Context,
) -> &<Self::TokenRecognizer as TokenRecognizer>::Options;
}
pub struct RuleRecognizerError {
pub message: String,
}
impl<S: Into<String>> From<S> for RuleRecognizerError {
fn from(message: S) -> Self {
Self {
message: message.into(),
}
}
}
#[allow(clippy::partial_pub_fields)]
pub struct Parser<RR: RuleRecognizer> {
lexer: Lexer<RR::TokenRecognizer>,
state: Option<RR>,
pub context: RR::Context,
results: Vec<RR::Output>,
errors: Vec<RuleRecognizerError>,
}
impl<RR: RuleRecognizer> Parser<RR> {
pub fn new(lexer: Lexer<RR::TokenRecognizer>, recognizer: RR, context: RR::Context) -> Self {
Self {
lexer,
state: Some(recognizer),
context,
results: vec![],
errors: vec![],
}
}
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.lexer.extend_from_slice(other)
}
#[inline]
pub fn end(&mut self) {
self.lexer.end()
}
#[inline]
pub fn is_end(&self) -> bool {
self.state.is_none() && self.results.is_empty() && self.errors.is_empty()
}
pub fn read_next(&mut self) -> Option<Result<RR::Output, TurtleSyntaxError>> {
loop {
if let Some(error) = self.errors.pop() {
return Some(Err(TurtleSyntaxError {
location: self.lexer.last_token_location(),
message: error
.message
.replace("TOKEN", &self.lexer.last_token_source()),
}));
}
if let Some(result) = self.results.pop() {
return Some(Ok(result));
}
if let Some(result) = self.lexer.read_next(RR::lexer_options(&self.context)) {
match result {
Ok(token) => {
self.state = self.state.take().map(|state| {
state.recognize_next(
token,
&mut self.context,
&mut self.results,
&mut self.errors,
)
});
continue;
}
Err(e) => {
self.state = self.state.take().map(RR::error_recovery_state);
return Some(Err(e));
}
}
}
if self.lexer.is_end() {
self.state.take()?.recognize_end(
&mut self.context,
&mut self.results,
&mut self.errors,
)
} else {
return None;
}
}
}
pub fn parse_read<R: Read>(self, read: R) -> FromReadIterator<R, RR> {
FromReadIterator { read, parser: self }
}
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
read: R,
) -> FromTokioAsyncReadIterator<R, RR> {
FromTokioAsyncReadIterator { read, parser: self }
}
}
#[allow(clippy::partial_pub_fields)]
pub struct FromReadIterator<R: Read, RR: RuleRecognizer> {
read: R,
pub parser: Parser<RR>,
}
impl<R: Read, RR: RuleRecognizer> Iterator for FromReadIterator<R, RR> {
type Item = Result<RR::Output, TurtleParseError>;
fn next(&mut self) -> Option<Self::Item> {
while !self.parser.is_end() {
if let Some(result) = self.parser.read_next() {
return Some(result.map_err(TurtleParseError::Syntax));
}
if let Err(e) = self.parser.lexer.extend_from_read(&mut self.read) {
return Some(Err(e.into()));
}
}
None
}
}
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadIterator<R: AsyncRead + Unpin, RR: RuleRecognizer> {
pub read: R,
pub parser: Parser<RR>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin, RR: RuleRecognizer> FromTokioAsyncReadIterator<R, RR> {
pub async fn next(&mut self) -> Option<Result<RR::Output, TurtleParseError>> {
while !self.parser.is_end() {
if let Some(result) = self.parser.read_next() {
return Some(result.map_err(TurtleParseError::Syntax));
}
if let Err(e) = self
.parser
.lexer
.extend_from_tokio_async_read(&mut self.read)
.await
{
return Some(Err(e.into()));
}
}
None
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,878 @@
//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`]
//! and a serializer implemented by [`TurtleSerializer`].
use crate::oxrdf::{GraphNameRef, Triple, TripleRef};
use crate::oxttl::terse::TriGRecognizer;
#[cfg(feature = "async-tokio")]
use crate::oxttl::toolkit::FromTokioAsyncReadIterator;
use crate::oxttl::toolkit::{FromReadIterator, Parser, TurtleParseError, TurtleSyntaxError};
#[cfg(feature = "async-tokio")]
use crate::oxttl::trig::ToTokioAsyncWriteTriGWriter;
use crate::oxttl::trig::{LowLevelTriGWriter, ToWriteTriGWriter, TriGSerializer};
use oxiri::{Iri, IriParseError};
use std::collections::hash_map::Iter;
use std::collections::HashMap;
use std::io::{self, Read, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncWrite};
/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
///
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" .
/// <bar> a schema:Person ;
/// schema:name "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct TurtleParser {
unchecked: bool,
base: Option<Iri<String>>,
prefixes: HashMap<String, Iri<String>>,
#[cfg(feature = "rdf-star")]
with_quoted_triples: bool,
}
impl TurtleParser {
/// Builds a new [`TurtleParser`].
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Assumes the file is valid to make parsing faster.
///
/// It will skip some validations.
///
/// Note that if the file is actually not valid, then broken RDF might be emitted by the parser.
#[inline]
pub fn unchecked(mut self) -> Self {
self.unchecked = true;
self
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.prefixes
.insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
Ok(self)
}
/// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
#[cfg(feature = "rdf-star")]
#[inline]
pub fn with_quoted_triples(mut self) -> Self {
self.with_quoted_triples = true;
self
}
/// Parses a Turtle file from a [`Read`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" .
/// <bar> a schema:Person ;
/// schema:name "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse_read<R: Read>(self, read: R) -> FromReadTurtleReader<R> {
FromReadTurtleReader {
inner: self.parse().parser.parse_read(read),
}
}
/// Parses a Turtle file from a [`AsyncRead`] implementation.
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" .
/// <bar> a schema:Person ;
/// schema:name "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn parse_tokio_async_read<R: AsyncRead + Unpin>(
self,
read: R,
) -> FromTokioAsyncReadTurtleReader<R> {
FromTokioAsyncReadTurtleReader {
inner: self.parse().parser.parse_tokio_async_read(read),
}
}
/// Allows to parse a Turtle file by using a low-level API.
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// let file: [&[u8]; 5] = [
/// b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" .",
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn parse(self) -> LowLevelTurtleReader {
LowLevelTurtleReader {
parser: TriGRecognizer::new_parser(
false,
#[cfg(feature = "rdf-star")]
self.with_quoted_triples,
self.unchecked,
self.base,
self.prefixes,
),
}
}
}
/// Parses a Turtle file from a [`Read`] implementation. Can be built using [`TurtleParser::parse_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" .
/// <bar> a schema:Person ;
/// schema:name "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// for triple in TurtleParser::new().parse_read(file.as_ref()) {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct FromReadTurtleReader<R: Read> {
inner: FromReadIterator<R, TriGRecognizer>,
}
impl<R: Read> FromReadTurtleReader<R> {
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// ```
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse_read(file.as_ref());
/// assert!(reader.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.inner.parser.context.prefixes(),
}
}
/// The base IRI considered at the current step of the parsing.
///
/// ```
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse_read(file.as_ref());
/// assert!(reader.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
///
/// reader.next().unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI.
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn base_iri(&self) -> Option<&str> {
self.inner
.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
impl<R: Read> Iterator for FromReadTurtleReader<R> {
type Item = Result<Triple, TurtleParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.inner.next()?.map(Into::into))
}
}
/// Parses a Turtle file from a [`AsyncRead`] implementation. Can be built using [`TurtleParser::parse_tokio_async_read`].
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" .
/// <bar> a schema:Person ;
/// schema:name "Bar" ."#;
///
/// let schema_person = NamedNodeRef::new_unchecked("http://schema.org/Person");
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse_tokio_async_read(file.as_ref());
/// while let Some(triple) = parser.next().await {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// assert_eq!(2, count);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct FromTokioAsyncReadTurtleReader<R: AsyncRead + Unpin> {
inner: FromTokioAsyncReadIterator<R, TriGRecognizer>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTurtleReader<R> {
/// Reads the next triple or returns `None` if the file is finished.
pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> {
Some(self.inner.next().await?.map(Into::into))
}
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// ```
/// use oxttl::TurtleParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref());
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Ok(())
/// # }
/// ```
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.inner.parser.context.prefixes(),
}
}
/// The base IRI considered at the current step of the parsing.
///
/// ```
/// use oxttl::TurtleParser;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), oxttl::TurtleParseError> {
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse_tokio_async_read(file.as_ref());
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
///
/// reader.next().await.unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
/// # Ok(())
/// # }
/// ```
pub fn base_iri(&self) -> Option<&str> {
self.inner
.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
/// Parses a Turtle file by using a low-level API. Can be built using [`TurtleParser::parse`].
///
/// Count the number of people:
/// ```
/// use oxrdf::vocab::rdf;
/// use oxrdf::NamedNodeRef;
/// use oxttl::TurtleParser;
///
/// let file: [&[u8]; 5] = [
/// b"@base <http://example.com/>",
/// b". @prefix schema: <http://schema.org/> .",
/// b"<foo> a schema:Person",
/// b" ; schema:name \"Foo\" . <bar>",
/// b" a schema:Person ; schema:name \"Bar\" .",
/// ];
///
/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
/// let mut count = 0;
/// let mut parser = TurtleParser::new().parse();
/// let mut file_chunks = file.iter();
/// while !parser.is_end() {
/// // We feed more data to the parser
/// if let Some(chunk) = file_chunks.next() {
/// parser.extend_from_slice(chunk);
/// } else {
/// parser.end(); // It's finished
/// }
/// // We read as many triples from the parser as possible
/// while let Some(triple) = parser.read_next() {
/// let triple = triple?;
/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
/// count += 1;
/// }
/// }
/// }
/// assert_eq!(2, count);
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTurtleReader {
parser: Parser<TriGRecognizer>,
}
impl LowLevelTurtleReader {
/// Adds some extra bytes to the parser. Should be called when [`read_next`](Self::read_next) returns [`None`] and there is still unread data.
pub fn extend_from_slice(&mut self, other: &[u8]) {
self.parser.extend_from_slice(other)
}
/// Tell the parser that the file is finished.
///
/// This triggers the parsing of the final bytes and might lead [`read_next`](Self::read_next) to return some extra values.
pub fn end(&mut self) {
self.parser.end()
}
/// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`read_next`](Self::read_next) is always going to return `None`.
pub fn is_end(&self) -> bool {
self.parser.is_end()
}
/// Attempt to parse a new triple from the already provided data.
///
/// Returns [`None`] if the parsing is finished or more data is required.
/// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
pub fn read_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> {
Some(self.parser.read_next()?.map(Into::into))
}
/// The list of IRI prefixes considered at the current step of the parsing.
///
/// This method returns (prefix name, prefix value) tuples.
/// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
/// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
///
/// ```
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse();
/// reader.extend_from_slice(file);
/// assert_eq!(reader.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
///
/// reader.read_next().unwrap()?; // We read the first triple
/// assert_eq!(
/// reader.prefixes().collect::<Vec<_>>(),
/// [("schema", "http://schema.org/")]
/// ); // There are now prefixes
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
TurtlePrefixesIter {
inner: self.parser.context.prefixes(),
}
}
/// The base IRI considered at the current step of the parsing.
///
/// ```
/// use oxttl::TurtleParser;
///
/// let file = br#"@base <http://example.com/> .
/// @prefix schema: <http://schema.org/> .
/// <foo> a schema:Person ;
/// schema:name "Foo" ."#;
///
/// let mut reader = TurtleParser::new().parse();
/// reader.extend_from_slice(file);
/// assert!(reader.base_iri().is_none()); // No base IRI at the beginning
///
/// reader.read_next().unwrap()?; // We read the first triple
/// assert_eq!(reader.base_iri(), Some("http://example.com/")); // There is now a base IRI
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn base_iri(&self) -> Option<&str> {
self.parser
.context
.lexer_options
.base_iri
.as_ref()
.map(Iri::as_str)
}
}
/// Iterator on the file prefixes.
///
/// See [`LowLevelTurtleReader::prefixes`].
pub struct TurtlePrefixesIter<'a> {
inner: Iter<'a, String, Iri<String>>,
}
impl<'a> Iterator for TurtlePrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let (key, value) = self.inner.next()?;
Some((key.as_str(), value.as_str()))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
///
/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Default)]
#[must_use]
pub struct TurtleSerializer {
inner: TriGSerializer,
}
impl TurtleSerializer {
/// Builds a new [`TurtleSerializer`].
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_prefix(
mut self,
prefix_name: impl Into<String>,
prefix_iri: impl Into<String>,
) -> Result<Self, IriParseError> {
self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?;
Ok(self)
}
/// Writes a Turtle file to a [`Write`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize_to_write<W: Write>(self, write: W) -> ToWriteTurtleWriter<W> {
ToWriteTurtleWriter {
inner: self.inner.serialize_to_write(write),
}
}
/// Writes a Turtle file to a [`AsyncWrite`] implementation.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(),Box<dyn std::error::Error>> {
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_tokio_async_write(Vec::new());
/// writer
/// .write_triple(TripleRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
/// ))
/// .await?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub fn serialize_to_tokio_async_write<W: AsyncWrite + Unpin>(
self,
write: W,
) -> ToTokioAsyncWriteTurtleWriter<W> {
ToTokioAsyncWriteTurtleWriter {
inner: self.inner.serialize_to_tokio_async_write(write),
}
}
/// Builds a low-level Turtle writer.
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize();
/// writer.write_triple(
/// TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ),
/// &mut buf,
/// )?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn serialize(self) -> LowLevelTurtleWriter {
LowLevelTurtleWriter {
inner: self.inner.serialize(),
}
}
}
/// Writes a Turtle file to a [`Write`] implementation. Can be built using [`TurtleSerializer::serialize_to_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_write(Vec::new());
/// writer.write_triple(TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ))?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// writer.finish()?.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct ToWriteTurtleWriter<W: Write> {
inner: ToWriteTriGWriter<W>,
}
impl<W: Write> ToWriteTurtleWriter<W> {
/// Writes an extra triple.
pub fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.inner
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
}
/// Ends the write process and returns the underlying [`Write`].
pub fn finish(self) -> io::Result<W> {
self.inner.finish()
}
}
/// Writes a Turtle file to a [`AsyncWrite`] implementation. Can be built using [`TurtleSerializer::serialize_to_tokio_async_write`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize_to_tokio_async_write(Vec::new());
/// writer
/// .write_triple(TripleRef::new(
/// NamedNodeRef::new_unchecked("http://example.com#me"),
/// NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
/// NamedNodeRef::new_unchecked("http://schema.org/Person"),
/// ))
/// .await?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// writer.finish().await?.as_slice()
/// );
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteTurtleWriter<W: AsyncWrite + Unpin> {
inner: ToTokioAsyncWriteTriGWriter<W>,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTurtleWriter<W> {
/// Writes an extra triple.
pub async fn write_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.inner
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
.await
}
/// Ends the write process and returns the underlying [`Write`].
pub async fn finish(self) -> io::Result<W> {
self.inner.finish().await
}
}
/// Writes a Turtle file by using a low-level API. Can be built using [`TurtleSerializer::serialize`].
///
/// ```
/// use oxrdf::{NamedNodeRef, TripleRef};
/// use oxttl::TurtleSerializer;
///
/// let mut buf = Vec::new();
/// let mut writer = TurtleSerializer::new()
/// .with_prefix("schema", "http://schema.org/")?
/// .serialize();
/// writer.write_triple(
/// TripleRef::new(
/// NamedNodeRef::new("http://example.com#me")?,
/// NamedNodeRef::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")?,
/// NamedNodeRef::new("http://schema.org/Person")?,
/// ),
/// &mut buf,
/// )?;
/// writer.finish(&mut buf)?;
/// assert_eq!(
/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
/// buf.as_slice()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct LowLevelTurtleWriter {
inner: LowLevelTriGWriter,
}
impl LowLevelTurtleWriter {
/// Writes an extra triple.
pub fn write_triple<'a>(
&mut self,
t: impl Into<TripleRef<'a>>,
write: impl Write,
) -> io::Result<()> {
self.inner
.write_quad(t.into().in_graph(GraphNameRef::DefaultGraph), write)
}
/// Finishes to write the file.
pub fn finish(&mut self, write: impl Write) -> io::Result<()> {
self.inner.finish(write)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use crate::oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef};
#[test]
fn test_write() -> io::Result<()> {
let mut writer = TurtleSerializer::new().serialize_to_write(Vec::new());
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
))?;
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
LiteralRef::new_simple_literal("foo"),
))?;
writer.write_triple(TripleRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
))?;
writer.write_triple(TripleRef::new(
BlankNodeRef::new_unchecked("b"),
NamedNodeRef::new_unchecked("http://example.com/p2"),
BlankNodeRef::new_unchecked("b2"),
))?;
assert_eq!(String::from_utf8(writer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n");
Ok(())
}
}

@ -0,0 +1,72 @@
Sparesults
==========
[![Latest Version](https://img.shields.io/crates/v/sparesults.svg)](https://crates.io/crates/sparesults)
[![Released API docs](https://docs.rs/sparesults/badge.svg)](https://docs.rs/sparesults)
[![Crates.io downloads](https://img.shields.io/crates/d/sparesults)](https://crates.io/crates/sparesults)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Sparesults is a set of parsers and serializers for [SPARQL](https://www.w3.org/TR/sparql11-overview/) query results formats.
It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#query-result-formats) is also available behind the `rdf-star` feature.
This crate is intended to be a building piece for SPARQL client and server implementations in Rust like [Oxigraph](https://oxigraph.org).
The entry points of this library are the two [`QueryResultsParser`] and [`QueryResultsSerializer`] structs.
Usage example converting a JSON result file into a TSV result file:
```rust
use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
use std::io::Result;
fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
// We start to read the JSON file and see which kind of results it is
match json_parser.parse_read(json_file)? {
FromReadQueryResultsReader::Boolean(value) => {
// it's a boolean result, we copy it in TSV to the output buffer
tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
},
FromReadQueryResultsReader::Solutions(solutions_reader) => {
// it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
for solution in solutions_reader {
serialize_solutions_to_write.write(&solution?)?;
}
serialize_solutions_to_write.finish()
}
}
}
// Let's test with a boolean
assert_eq!(
convert_json_to_tsv(b"{\"boolean\":true}".as_slice()).unwrap(),
b"true"
);
// And with a set of solutions
assert_eq!(
convert_json_to_tsv(b"{\"head\":{\"vars\":[\"foo\",\"bar\"]},\"results\":{\"bindings\":[{\"foo\":{\"type\":\"literal\",\"value\":\"test\"}}]}}".as_slice()).unwrap(),
b"?foo\t?bar\n\"test\"\t\n"
);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,948 @@
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
use crate::oxrdf::vocab::xsd;
use crate::oxrdf::*;
use crate::sparesults::error::{
QueryResultsParseError, QueryResultsSyntaxError, SyntaxErrorKind, TextPosition,
};
use memchr::memchr;
use std::io::{self, Read, Write};
use std::str::{self, FromStr};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
const MAX_BUFFER_SIZE: usize = 4096 * 4096;
pub fn write_boolean_csv_result<W: Write>(mut write: W, value: bool) -> io::Result<W> {
write.write_all(if value { b"true" } else { b"false" })?;
Ok(write)
}
#[cfg(feature = "async-tokio")]
pub async fn tokio_async_write_boolean_csv_result<W: AsyncWrite + Unpin>(
mut write: W,
value: bool,
) -> io::Result<W> {
write
.write_all(if value { b"true" } else { b"false" })
.await?;
Ok(write)
}
pub struct ToWriteCsvSolutionsWriter<W: Write> {
inner: InnerCsvSolutionsWriter,
write: W,
buffer: String,
}
impl<W: Write> ToWriteCsvSolutionsWriter<W> {
pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> {
let mut buffer = String::new();
let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables);
write.write_all(buffer.as_bytes())?;
buffer.clear();
Ok(Self {
inner,
write,
buffer,
})
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
self.inner.write(&mut self.buffer, solution);
self.write.write_all(self.buffer.as_bytes())?;
self.buffer.clear();
Ok(())
}
pub fn finish(self) -> W {
self.write
}
}
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteCsvSolutionsWriter<W: AsyncWrite + Unpin> {
inner: InnerCsvSolutionsWriter,
write: W,
buffer: String,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteCsvSolutionsWriter<W> {
pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> {
let mut buffer = String::new();
let inner = InnerCsvSolutionsWriter::start(&mut buffer, variables);
write.write_all(buffer.as_bytes()).await?;
buffer.clear();
Ok(Self {
inner,
write,
buffer,
})
}
pub async fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
self.inner.write(&mut self.buffer, solution);
self.write.write_all(self.buffer.as_bytes()).await?;
self.buffer.clear();
Ok(())
}
pub fn finish(self) -> W {
self.write
}
}
struct InnerCsvSolutionsWriter {
variables: Vec<Variable>,
}
impl InnerCsvSolutionsWriter {
fn start(output: &mut String, variables: Vec<Variable>) -> Self {
let mut start_vars = true;
for variable in &variables {
if start_vars {
start_vars = false;
} else {
output.push(',');
}
output.push_str(variable.as_str());
}
output.push_str("\r\n");
Self { variables }
}
fn write<'a>(
&self,
output: &mut String,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) {
let mut values = vec![None; self.variables.len()];
for (variable, value) in solution {
if let Some(position) = self.variables.iter().position(|v| *v == variable) {
values[position] = Some(value);
}
}
let mut start_binding = true;
for value in values {
if start_binding {
start_binding = false;
} else {
output.push(',');
}
if let Some(value) = value {
write_csv_term(output, value);
}
}
output.push_str("\r\n");
}
}
fn write_csv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) {
match term.into() {
TermRef::NamedNode(uri) => output.push_str(uri.as_str()),
TermRef::BlankNode(bnode) => {
output.push_str("_:");
output.push_str(bnode.as_str())
}
TermRef::Literal(literal) => write_escaped_csv_string(output, literal.value()),
#[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => {
write_csv_term(output, &triple.subject);
output.push(' ');
write_csv_term(output, &triple.predicate);
output.push(' ');
write_csv_term(output, &triple.object)
}
}
}
fn write_escaped_csv_string(output: &mut String, s: &str) {
if s.bytes().any(|c| matches!(c, b'"' | b',' | b'\n' | b'\r')) {
output.push('"');
for c in s.chars() {
if c == '"' {
output.push('"');
output.push('"');
} else {
output.push(c)
};
}
output.push('"');
} else {
output.push_str(s)
}
}
pub struct ToWriteTsvSolutionsWriter<W: Write> {
inner: InnerTsvSolutionsWriter,
write: W,
buffer: String,
}
impl<W: Write> ToWriteTsvSolutionsWriter<W> {
pub fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> {
let mut buffer = String::new();
let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables);
write.write_all(buffer.as_bytes())?;
buffer.clear();
Ok(Self {
inner,
write,
buffer,
})
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
self.inner.write(&mut self.buffer, solution);
self.write.write_all(self.buffer.as_bytes())?;
self.buffer.clear();
Ok(())
}
pub fn finish(self) -> W {
self.write
}
}
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteTsvSolutionsWriter<W: AsyncWrite + Unpin> {
inner: InnerTsvSolutionsWriter,
write: W,
buffer: String,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteTsvSolutionsWriter<W> {
pub async fn start(mut write: W, variables: Vec<Variable>) -> io::Result<Self> {
let mut buffer = String::new();
let inner = InnerTsvSolutionsWriter::start(&mut buffer, variables);
write.write_all(buffer.as_bytes()).await?;
buffer.clear();
Ok(Self {
inner,
write,
buffer,
})
}
pub async fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
self.inner.write(&mut self.buffer, solution);
self.write.write_all(self.buffer.as_bytes()).await?;
self.buffer.clear();
Ok(())
}
pub fn finish(self) -> W {
self.write
}
}
struct InnerTsvSolutionsWriter {
variables: Vec<Variable>,
}
impl InnerTsvSolutionsWriter {
fn start(output: &mut String, variables: Vec<Variable>) -> Self {
let mut start_vars = true;
for variable in &variables {
if start_vars {
start_vars = false;
} else {
output.push('\t');
}
output.push('?');
output.push_str(variable.as_str());
}
output.push('\n');
Self { variables }
}
fn write<'a>(
&self,
output: &mut String,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) {
let mut values = vec![None; self.variables.len()];
for (variable, value) in solution {
if let Some(position) = self.variables.iter().position(|v| *v == variable) {
values[position] = Some(value);
}
}
let mut start_binding = true;
for value in values {
if start_binding {
start_binding = false;
} else {
output.push('\t');
}
if let Some(value) = value {
write_tsv_term(output, value);
}
}
output.push('\n');
}
}
fn write_tsv_term<'a>(output: &mut String, term: impl Into<TermRef<'a>>) {
match term.into() {
TermRef::NamedNode(node) => {
output.push('<');
output.push_str(node.as_str());
output.push('>');
}
TermRef::BlankNode(node) => {
output.push_str("_:");
output.push_str(node.as_str());
}
TermRef::Literal(literal) => {
let value = literal.value();
if let Some(language) = literal.language() {
write_tsv_quoted_str(output, value);
output.push('@');
output.push_str(language);
} else {
match literal.datatype() {
xsd::BOOLEAN if is_turtle_boolean(value) => output.push_str(value),
xsd::INTEGER if is_turtle_integer(value) => output.push_str(value),
xsd::DECIMAL if is_turtle_decimal(value) => output.push_str(value),
xsd::DOUBLE if is_turtle_double(value) => output.push_str(value),
xsd::STRING => write_tsv_quoted_str(output, value),
datatype => {
write_tsv_quoted_str(output, value);
output.push_str("^^");
write_tsv_term(output, datatype);
}
}
}
}
#[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => {
output.push_str("<< ");
write_tsv_term(output, &triple.subject);
output.push(' ');
write_tsv_term(output, &triple.predicate);
output.push(' ');
write_tsv_term(output, &triple.object);
output.push_str(" >>");
}
}
}
fn write_tsv_quoted_str(output: &mut String, string: &str) {
output.push('"');
for c in string.chars() {
match c {
'\t' => output.push_str("\\t"),
'\n' => output.push_str("\\n"),
'\r' => output.push_str("\\r"),
'"' => output.push_str("\\\""),
'\\' => output.push_str("\\\\"),
_ => output.push(c),
};
}
output.push('"');
}
fn is_turtle_boolean(value: &str) -> bool {
matches!(value, "true" | "false")
}
fn is_turtle_integer(value: &str) -> bool {
// [19] INTEGER ::= [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
!value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
fn is_turtle_decimal(value: &str) -> bool {
// [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
}
let Some(value) = value.strip_prefix(b".") else {
return false;
};
!value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
fn is_turtle_double(value: &str) -> bool {
// [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
// [154s] EXPONENT ::= [eE] [+-]? [0-9]+
let mut value = value.as_bytes();
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
let mut with_before = false;
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
with_before = true;
}
let mut with_after = false;
if let Some(v) = value.strip_prefix(b".") {
value = v;
while value.first().map_or(false, u8::is_ascii_digit) {
value = &value[1..];
with_after = true;
}
}
if let Some(v) = value.strip_prefix(b"e") {
value = v;
} else if let Some(v) = value.strip_prefix(b"E") {
value = v;
} else {
return false;
}
if let Some(v) = value.strip_prefix(b"+") {
value = v;
} else if let Some(v) = value.strip_prefix(b"-") {
value = v;
}
(with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit)
}
pub enum FromReadTsvQueryResultsReader<R: Read> {
Solutions {
variables: Vec<Variable>,
solutions: FromReadTsvSolutionsReader<R>,
},
Boolean(bool),
}
impl<R: Read> FromReadTsvQueryResultsReader<R> {
pub fn read(mut read: R) -> Result<Self, QueryResultsParseError> {
let mut reader = LineReader::new();
let mut buffer = Vec::new();
let line = reader.next_line(&mut buffer, &mut read)?;
Ok(match inner_read_first_line(reader, line)? {
TsvInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromReadTsvSolutionsReader {
read,
inner: solutions,
buffer,
},
},
TsvInnerQueryResults::Boolean(value) => Self::Boolean(value),
})
}
}
pub struct FromReadTsvSolutionsReader<R: Read> {
read: R,
inner: TsvInnerSolutionsReader,
buffer: Vec<u8>,
}
impl<R: Read> FromReadTsvSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
let line = self
.inner
.reader
.next_line(&mut self.buffer, &mut self.read)?;
self.inner.read_next(line)
}
}
#[cfg(feature = "async-tokio")]
pub enum FromTokioAsyncReadTsvQueryResultsReader<R: AsyncRead + Unpin> {
Solutions {
variables: Vec<Variable>,
solutions: FromTokioAsyncReadTsvSolutionsReader<R>,
},
Boolean(bool),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvQueryResultsReader<R> {
pub async fn read(mut read: R) -> Result<Self, QueryResultsParseError> {
let mut reader = LineReader::new();
let mut buffer = Vec::new();
let line = reader.next_line_tokio_async(&mut buffer, &mut read).await?;
Ok(match inner_read_first_line(reader, line)? {
TsvInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromTokioAsyncReadTsvSolutionsReader {
read,
inner: solutions,
buffer,
},
},
TsvInnerQueryResults::Boolean(value) => Self::Boolean(value),
})
}
}
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadTsvSolutionsReader<R: AsyncRead + Unpin> {
read: R,
inner: TsvInnerSolutionsReader,
buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadTsvSolutionsReader<R> {
pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
let line = self
.inner
.reader
.next_line_tokio_async(&mut self.buffer, &mut self.read)
.await?;
self.inner.read_next(line)
}
}
enum TsvInnerQueryResults {
Solutions {
variables: Vec<Variable>,
solutions: TsvInnerSolutionsReader,
},
Boolean(bool),
}
fn inner_read_first_line(
reader: LineReader,
line: &str,
) -> Result<TsvInnerQueryResults, QueryResultsParseError> {
let line = line.trim_matches(|c| matches!(c, ' ' | '\r' | '\n'));
if line.eq_ignore_ascii_case("true") {
return Ok(TsvInnerQueryResults::Boolean(true));
}
if line.eq_ignore_ascii_case("false") {
return Ok(TsvInnerQueryResults::Boolean(false));
}
let mut variables = Vec::new();
if !line.is_empty() {
for v in line.split('\t') {
let v = v.trim();
if v.is_empty() {
return Err(QueryResultsSyntaxError::msg("Empty column on the first row. The first row should be a list of variables like ?foo or $bar").into());
}
let variable = Variable::from_str(v).map_err(|e| {
QueryResultsSyntaxError::msg(format!("Invalid variable declaration '{v}': {e}"))
})?;
if variables.contains(&variable) {
return Err(QueryResultsSyntaxError::msg(format!(
"The variable {variable} is declared twice"
))
.into());
}
variables.push(variable);
}
}
let column_len = variables.len();
Ok(TsvInnerQueryResults::Solutions {
variables,
solutions: TsvInnerSolutionsReader { reader, column_len },
})
}
struct TsvInnerSolutionsReader {
reader: LineReader,
column_len: usize,
}
impl TsvInnerSolutionsReader {
#[allow(clippy::unwrap_in_result)]
pub fn read_next(
&self,
line: &str,
) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
if line.is_empty() {
return Ok(None); // EOF
}
let elements = line
.split('\t')
.enumerate()
.map(|(i, v)| {
let v = v.trim();
if v.is_empty() {
Ok(None)
} else {
Ok(Some(Term::from_str(v).map_err(|e| {
let start_position_char = line
.split('\t')
.take(i)
.map(|c| c.chars().count() + 1)
.sum::<usize>();
let start_position_bytes =
line.split('\t').take(i).map(|c| c.len() + 1).sum::<usize>();
QueryResultsSyntaxError(SyntaxErrorKind::Term {
error: e,
term: v.into(),
location: TextPosition {
line: self.reader.line_count - 1,
column: start_position_char.try_into().unwrap(),
offset: self.reader.last_line_start
+ u64::try_from(start_position_bytes).unwrap(),
}..TextPosition {
line: self.reader.line_count - 1,
column: (start_position_char + v.chars().count())
.try_into()
.unwrap(),
offset: self.reader.last_line_start
+ u64::try_from(start_position_bytes + v.len()).unwrap(),
},
})
})?))
}
})
.collect::<Result<Vec<_>, QueryResultsParseError>>()?;
if elements.len() == self.column_len {
Ok(Some(elements))
} else if self.column_len == 0 && elements == [None] {
Ok(Some(Vec::new())) // Zero columns case
} else {
Err(QueryResultsSyntaxError::located_message(
format!(
"This TSV files has {} columns but we found a row on line {} with {} columns: {}",
self.column_len,
self.reader.line_count - 1,
elements.len(),
line
),
TextPosition {
line: self.reader.line_count - 1,
column: 0,
offset: self.reader.last_line_start,
}..TextPosition {
line: self.reader.line_count - 1,
column: line.chars().count().try_into().unwrap(),
offset: self.reader.last_line_end,
},
)
.into())
}
}
}
struct LineReader {
buffer_start: usize,
buffer_end: usize,
line_count: u64,
last_line_start: u64,
last_line_end: u64,
}
impl LineReader {
fn new() -> Self {
Self {
buffer_start: 0,
buffer_end: 0,
line_count: 0,
last_line_start: 0,
last_line_end: 0,
}
}
#[allow(clippy::unwrap_in_result)]
fn next_line<'a>(
&mut self,
buffer: &'a mut Vec<u8>,
read: &mut impl Read,
) -> io::Result<&'a str> {
let line_end = loop {
if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) {
break self.buffer_start + eol + 1;
}
if self.buffer_start > 0 {
buffer.copy_within(self.buffer_start..self.buffer_end, 0);
self.buffer_end -= self.buffer_start;
self.buffer_start = 0;
}
if self.buffer_end + 1024 > buffer.len() {
if self.buffer_end + 1024 > MAX_BUFFER_SIZE {
return Err(io::Error::new(
io::ErrorKind::OutOfMemory,
format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"),
));
}
buffer.resize(self.buffer_end + 1024, b'\0');
}
let read = read.read(&mut buffer[self.buffer_end..])?;
if read == 0 {
break self.buffer_end;
}
self.buffer_end += read;
};
let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid UTF-8 in the TSV file: {e}"),
)
});
self.line_count += 1;
self.last_line_start = self.last_line_end;
self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap();
self.buffer_start = line_end;
result
}
#[cfg(feature = "async-tokio")]
#[allow(clippy::unwrap_in_result)]
async fn next_line_tokio_async<'a>(
&mut self,
buffer: &'a mut Vec<u8>,
read: &mut (impl AsyncRead + Unpin),
) -> io::Result<&'a str> {
let line_end = loop {
if let Some(eol) = memchr(b'\n', &buffer[self.buffer_start..self.buffer_end]) {
break self.buffer_start + eol + 1;
}
if self.buffer_start > 0 {
buffer.copy_within(self.buffer_start..self.buffer_end, 0);
self.buffer_end -= self.buffer_start;
self.buffer_start = 0;
}
if self.buffer_end + 1024 > buffer.len() {
if self.buffer_end + 1024 > MAX_BUFFER_SIZE {
return Err(io::Error::new(
io::ErrorKind::OutOfMemory,
format!("Reached the buffer maximal size of {MAX_BUFFER_SIZE}"),
));
}
buffer.resize(self.buffer_end + 1024, b'\0');
}
let read = read.read(&mut buffer[self.buffer_end..]).await?;
if read == 0 {
break self.buffer_end;
}
self.buffer_end += read;
};
let result = str::from_utf8(&buffer[self.buffer_start..line_end]).map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid UTF-8 in the TSV file: {e}"),
)
});
self.line_count += 1;
self.last_line_start = self.last_line_end;
self.last_line_end += u64::try_from(line_end - self.buffer_start).unwrap();
self.buffer_start = line_end;
result
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use std::error::Error;
fn build_example() -> (Vec<Variable>, Vec<Vec<Option<Term>>>) {
(
vec![
Variable::new_unchecked("x"),
Variable::new_unchecked("literal"),
],
vec![
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String").into()),
],
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
Some(Literal::new_simple_literal("String-with-dquote\"").into()),
],
vec![
Some(BlankNode::new_unchecked("b0").into()),
Some(Literal::new_simple_literal("Blank node").into()),
],
vec![
None,
Some(Literal::new_simple_literal("Missing 'x'").into()),
],
vec![None, None],
vec![
Some(NamedNode::new_unchecked("http://example/x").into()),
None,
],
vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(
Literal::new_language_tagged_literal_unchecked("String-with-lang", "en")
.into(),
),
],
vec![
Some(BlankNode::new_unchecked("b1").into()),
Some(Literal::new_typed_literal("123", xsd::INTEGER).into()),
],
vec![
None,
Some(Literal::new_simple_literal("escape,\t\r\n").into()),
],
],
)
}
#[test]
fn test_csv_serialization() {
let (variables, solutions) = build_example();
let mut buffer = String::new();
let writer = InnerCsvSolutionsWriter::start(&mut buffer, variables.clone());
for solution in solutions {
writer.write(
&mut buffer,
variables
.iter()
.zip(&solution)
.filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))),
);
}
assert_eq!(buffer, "x,literal\r\nhttp://example/x,String\r\nhttp://example/x,\"String-with-dquote\"\"\"\r\n_:b0,Blank node\r\n,Missing 'x'\r\n,\r\nhttp://example/x,\r\n_:b1,String-with-lang\r\n_:b1,123\r\n,\"escape,\t\r\n\"\r\n");
}
#[test]
fn test_tsv_roundtrip() -> Result<(), Box<dyn Error>> {
let (variables, solutions) = build_example();
// Write
let mut buffer = String::new();
let writer = InnerTsvSolutionsWriter::start(&mut buffer, variables.clone());
for solution in &solutions {
writer.write(
&mut buffer,
variables
.iter()
.zip(solution)
.filter_map(|(v, s)| s.as_ref().map(|s| (v.as_ref(), s.as_ref()))),
);
}
assert_eq!(buffer, "?x\t?literal\n<http://example/x>\t\"String\"\n<http://example/x>\t\"String-with-dquote\\\"\"\n_:b0\t\"Blank node\"\n\t\"Missing 'x'\"\n\t\n<http://example/x>\t\n_:b1\t\"String-with-lang\"@en\n_:b1\t123\n\t\"escape,\\t\\r\\n\"\n");
// Read
if let FromReadTsvQueryResultsReader::Solutions {
solutions: mut solutions_iter,
variables: actual_variables,
} = FromReadTsvQueryResultsReader::read(buffer.as_bytes())?
{
assert_eq!(actual_variables.as_slice(), variables.as_slice());
let mut rows = Vec::new();
while let Some(row) = solutions_iter.read_next()? {
rows.push(row);
}
assert_eq!(rows, solutions);
} else {
unreachable!()
}
Ok(())
}
#[test]
fn test_bad_tsv() {
let mut bad_tsvs = vec![
"?",
"?p",
"?p?o",
"?p\n<",
"?p\n_",
"?p\n_:",
"?p\n\"",
"?p\n<<",
"?p\n1\t2\n",
"?p\n\n",
];
let a_lot_of_strings = format!("?p\n{}\n", "<".repeat(100_000));
bad_tsvs.push(&a_lot_of_strings);
for bad_tsv in bad_tsvs {
if let Ok(FromReadTsvQueryResultsReader::Solutions { mut solutions, .. }) =
FromReadTsvQueryResultsReader::read(bad_tsv.as_bytes())
{
while let Ok(Some(_)) = solutions.read_next() {}
}
}
}
#[test]
fn test_no_columns_csv_serialization() {
let mut buffer = String::new();
let writer = InnerCsvSolutionsWriter::start(&mut buffer, Vec::new());
writer.write(&mut buffer, []);
assert_eq!(buffer, "\r\n\r\n");
}
#[test]
fn test_no_columns_tsv_serialization() {
let mut buffer = String::new();
let writer = InnerTsvSolutionsWriter::start(&mut buffer, Vec::new());
writer.write(&mut buffer, []);
assert_eq!(buffer, "\n\n");
}
#[test]
fn test_no_columns_tsv_parsing() -> io::Result<()> {
if let FromReadTsvQueryResultsReader::Solutions {
mut solutions,
variables,
} = FromReadTsvQueryResultsReader::read(b"\n\n".as_slice())?
{
assert_eq!(variables, Vec::<Variable>::new());
assert_eq!(solutions.read_next()?, Some(Vec::new()));
assert_eq!(solutions.read_next()?, None);
} else {
unreachable!()
}
Ok(())
}
#[test]
fn test_no_results_csv_serialization() {
let mut buffer = String::new();
InnerCsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]);
assert_eq!(buffer, "a\r\n");
}
#[test]
fn test_no_results_tsv_serialization() {
let mut buffer = String::new();
InnerTsvSolutionsWriter::start(&mut buffer, vec![Variable::new_unchecked("a")]);
assert_eq!(buffer, "?a\n");
}
#[test]
fn test_no_results_tsv_parsing() -> io::Result<()> {
if let FromReadTsvQueryResultsReader::Solutions {
mut solutions,
variables,
} = FromReadTsvQueryResultsReader::read(b"?a\n".as_slice())?
{
assert_eq!(variables, vec![Variable::new_unchecked("a")]);
assert_eq!(solutions.read_next()?, None);
} else {
unreachable!()
}
Ok(())
}
}

@ -0,0 +1,157 @@
use crate::oxrdf::TermParseError;
use std::io;
use std::ops::Range;
use std::sync::Arc;
/// Error returned during SPARQL result formats format parsing.
#[derive(Debug, thiserror::Error)]
pub enum QueryResultsParseError {
/// I/O error during parsing (file not found...).
#[error(transparent)]
Io(#[from] io::Error),
/// An error in the file syntax.
#[error(transparent)]
Syntax(#[from] QueryResultsSyntaxError),
}
impl From<QueryResultsParseError> for io::Error {
#[inline]
fn from(error: QueryResultsParseError) -> Self {
match error {
QueryResultsParseError::Io(error) => error,
QueryResultsParseError::Syntax(error) => error.into(),
}
}
}
impl From<json_event_parser::ParseError> for QueryResultsParseError {
fn from(error: json_event_parser::ParseError) -> Self {
match error {
json_event_parser::ParseError::Syntax(error) => {
QueryResultsSyntaxError::from(error).into()
}
json_event_parser::ParseError::Io(error) => error.into(),
}
}
}
impl From<quick_xml::Error> for QueryResultsParseError {
#[inline]
fn from(error: quick_xml::Error) -> Self {
match error {
quick_xml::Error::Io(error) => {
Self::Io(Arc::try_unwrap(error).unwrap_or_else(|e| io::Error::new(e.kind(), e)))
}
_ => Self::Syntax(QueryResultsSyntaxError(SyntaxErrorKind::Xml(error))),
}
}
}
impl From<quick_xml::escape::EscapeError> for QueryResultsParseError {
#[inline]
fn from(error: quick_xml::escape::EscapeError) -> Self {
quick_xml::Error::from(error).into()
}
}
/// An error in the syntax of the parsed file.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct QueryResultsSyntaxError(#[from] pub(crate) SyntaxErrorKind);
#[derive(Debug, thiserror::Error)]
pub(crate) enum SyntaxErrorKind {
#[error(transparent)]
Json(#[from] json_event_parser::SyntaxError),
#[error(transparent)]
Xml(#[from] quick_xml::Error),
#[error("Error {error} on '{term}' in line {}", location.start.line + 1)]
Term {
#[source]
error: TermParseError,
term: String,
location: Range<TextPosition>,
},
#[error("{msg}")]
Msg {
msg: String,
location: Option<Range<TextPosition>>,
},
}
impl QueryResultsSyntaxError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(SyntaxErrorKind::Msg {
msg: msg.into(),
location: None,
})
}
/// Builds an error from a printable error message and a location
#[inline]
pub(crate) fn located_message(msg: impl Into<String>, location: Range<TextPosition>) -> Self {
Self(SyntaxErrorKind::Msg {
msg: msg.into(),
location: Some(location),
})
}
/// The location of the error inside of the file.
#[inline]
pub fn location(&self) -> Option<Range<TextPosition>> {
match &self.0 {
SyntaxErrorKind::Json(e) => {
let location = e.location();
Some(
TextPosition {
line: location.start.line,
column: location.start.column,
offset: location.start.offset,
}..TextPosition {
line: location.end.line,
column: location.end.column,
offset: location.end.offset,
},
)
}
SyntaxErrorKind::Term { location, .. } => Some(location.clone()),
SyntaxErrorKind::Msg { location, .. } => location.clone(),
SyntaxErrorKind::Xml(_) => None,
}
}
}
impl From<QueryResultsSyntaxError> for io::Error {
#[inline]
fn from(error: QueryResultsSyntaxError) -> Self {
match error.0 {
SyntaxErrorKind::Json(error) => Self::new(io::ErrorKind::InvalidData, error),
SyntaxErrorKind::Xml(error) => match error {
quick_xml::Error::Io(error) => {
Arc::try_unwrap(error).unwrap_or_else(|e| Self::new(e.kind(), e))
}
quick_xml::Error::UnexpectedEof(error) => {
Self::new(io::ErrorKind::UnexpectedEof, error)
}
_ => Self::new(io::ErrorKind::InvalidData, error),
},
SyntaxErrorKind::Term { .. } => Self::new(io::ErrorKind::InvalidData, error),
SyntaxErrorKind::Msg { msg, .. } => Self::new(io::ErrorKind::InvalidData, msg),
}
}
}
impl From<json_event_parser::SyntaxError> for QueryResultsSyntaxError {
fn from(error: json_event_parser::SyntaxError) -> Self {
Self(SyntaxErrorKind::Json(error))
}
}
/// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes).
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
pub struct TextPosition {
pub line: u64,
pub column: u64,
pub offset: u64,
}

@ -0,0 +1,176 @@
use std::fmt;
/// [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
pub enum QueryResultsFormat {
/// [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)
Xml,
/// [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/)
Json,
/// [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Csv,
/// [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/)
Tsv,
}
impl QueryResultsFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(
/// QueryResultsFormat::Json.iri(),
/// "http://www.w3.org/ns/formats/SPARQL_Results_JSON"
/// )
/// ```
#[inline]
pub fn iri(self) -> &'static str {
match self {
Self::Xml => "http://www.w3.org/ns/formats/SPARQL_Results_XML",
Self::Json => "http://www.w3.org/ns/formats/SPARQL_Results_JSON",
Self::Csv => "http://www.w3.org/ns/formats/SPARQL_Results_CSV",
Self::Tsv => "http://www.w3.org/ns/formats/SPARQL_Results_TSV",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(
/// QueryResultsFormat::Json.media_type(),
/// "application/sparql-results+json"
/// )
/// ```
#[inline]
pub fn media_type(self) -> &'static str {
match self {
Self::Xml => "application/sparql-results+xml",
Self::Json => "application/sparql-results+json",
Self::Csv => "text/csv; charset=utf-8",
Self::Tsv => "text/tab-separated-values; charset=utf-8",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.file_extension(), "srj")
/// ```
#[inline]
pub fn file_extension(self) -> &'static str {
match self {
Self::Xml => "srx",
Self::Json => "srj",
Self::Csv => "csv",
Self::Tsv => "tsv",
}
}
/// The format name.
///
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(QueryResultsFormat::Json.name(), "SPARQL Results in JSON")
/// ```
#[inline]
pub const fn name(self) -> &'static str {
match self {
Self::Xml => "SPARQL Results in XML",
Self::Json => "SPARQL Results in JSON",
Self::Csv => "SPARQL Results in CSV",
Self::Tsv => "SPARQL Results in TSV",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `Xml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(
/// QueryResultsFormat::from_media_type("application/sparql-results+json; charset=utf-8"),
/// Some(QueryResultsFormat::Json)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
const MEDIA_SUBTYPES: [(&str, QueryResultsFormat); 8] = [
("csv", QueryResultsFormat::Csv),
("json", QueryResultsFormat::Json),
("plain", QueryResultsFormat::Csv),
("sparql-results+json", QueryResultsFormat::Json),
("sparql-results+xml", QueryResultsFormat::Xml),
("tab-separated-values", QueryResultsFormat::Tsv),
("tsv", QueryResultsFormat::Tsv),
("xml", QueryResultsFormat::Xml),
];
let (r#type, subtype) = media_type
.split_once(';')
.unwrap_or((media_type, ""))
.0
.trim()
.split_once('/')?;
let r#type = r#type.trim();
if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") {
return None;
}
let subtype = subtype.trim();
let subtype = subtype.strip_prefix("x-").unwrap_or(subtype);
for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES {
if candidate_subtype.eq_ignore_ascii_case(subtype) {
return Some(candidate_id);
}
}
None
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use sparesults::QueryResultsFormat;
///
/// assert_eq!(
/// QueryResultsFormat::from_extension("json"),
/// Some(QueryResultsFormat::Json)
/// )
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
const MEDIA_TYPES: [(&str, QueryResultsFormat); 7] = [
("csv", QueryResultsFormat::Csv),
("json", QueryResultsFormat::Json),
("srj", QueryResultsFormat::Json),
("srx", QueryResultsFormat::Xml),
("tsv", QueryResultsFormat::Tsv),
("txt", QueryResultsFormat::Csv),
("xml", QueryResultsFormat::Xml),
];
for (candidate_extension, candidate_id) in MEDIA_TYPES {
if candidate_extension.eq_ignore_ascii_case(extension) {
return Some(candidate_id);
}
}
None
}
}
impl fmt::Display for QueryResultsFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.name())
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,16 @@
mod csv;
mod error;
mod format;
mod json;
mod parser;
mod serializer;
pub mod solution;
mod xml;
pub use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError, TextPosition};
pub use crate::sparesults::format::QueryResultsFormat;
pub use crate::sparesults::parser::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsParser,
};
pub use crate::sparesults::serializer::{QueryResultsSerializer, ToWriteSolutionsWriter};
pub use crate::sparesults::solution::QuerySolution;

@ -0,0 +1,460 @@
use crate::oxrdf::Variable;
use crate::sparesults::csv::{FromReadTsvQueryResultsReader, FromReadTsvSolutionsReader};
#[cfg(feature = "async-tokio")]
use crate::sparesults::csv::{
FromTokioAsyncReadTsvQueryResultsReader, FromTokioAsyncReadTsvSolutionsReader,
};
use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError};
use crate::sparesults::format::QueryResultsFormat;
use crate::sparesults::json::{FromReadJsonQueryResultsReader, FromReadJsonSolutionsReader};
#[cfg(feature = "async-tokio")]
use crate::sparesults::json::{
FromTokioAsyncReadJsonQueryResultsReader, FromTokioAsyncReadJsonSolutionsReader,
};
use crate::sparesults::solution::QuerySolution;
use crate::sparesults::xml::{FromReadXmlQueryResultsReader, FromReadXmlSolutionsReader};
#[cfg(feature = "async-tokio")]
use crate::sparesults::xml::{
FromTokioAsyncReadXmlQueryResultsReader, FromTokioAsyncReadXmlSolutionsReader,
};
use std::io::Read;
use std::sync::Arc;
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncRead;
/// Parsers for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
///
/// It currently supports the following formats:
/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml)).
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json)).
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv)).
///
/// Example in JSON (the API is the same for XML and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
/// use oxrdf::{Literal, Variable};
///
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
/// // boolean
/// if let FromReadQueryResultsReader::Boolean(v) = json_parser.parse_read(br#"{"boolean":true}"#.as_slice())? {
/// assert_eq!(v, true);
/// }
/// // solutions
/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? {
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
/// for solution in solutions {
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
/// }
/// }
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
/// ```
pub struct QueryResultsParser {
format: QueryResultsFormat,
}
impl QueryResultsParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: QueryResultsFormat) -> Self {
Self { format }
}
/// Reads a result file from a [`Read`] implementation.
///
/// Reads are automatically buffered.
///
/// Example in XML (the API is the same for JSON and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
/// use oxrdf::{Literal, Variable};
///
/// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
///
/// // boolean
/// if let FromReadQueryResultsReader::Boolean(v) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice())? {
/// assert_eq!(v, true);
/// }
///
/// // solutions
/// if let FromReadQueryResultsReader::Solutions(solutions) = xml_parser.parse_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice())? {
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
/// for solution in solutions {
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
/// }
/// }
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
/// ```
pub fn parse_read<R: Read>(
&self,
reader: R,
) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> {
Ok(match self.format {
QueryResultsFormat::Xml => match FromReadXmlQueryResultsReader::read(reader)? {
FromReadXmlQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r),
FromReadXmlQueryResultsReader::Solutions {
solutions,
variables,
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader {
variables: variables.into(),
solutions: FromReadSolutionsReaderKind::Xml(solutions),
}),
},
QueryResultsFormat::Json => match FromReadJsonQueryResultsReader::read(reader)? {
FromReadJsonQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r),
FromReadJsonQueryResultsReader::Solutions {
solutions,
variables,
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader {
variables: variables.into(),
solutions: FromReadSolutionsReaderKind::Json(solutions),
}),
},
QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()),
QueryResultsFormat::Tsv => match FromReadTsvQueryResultsReader::read(reader)? {
FromReadTsvQueryResultsReader::Boolean(r) => FromReadQueryResultsReader::Boolean(r),
FromReadTsvQueryResultsReader::Solutions {
solutions,
variables,
} => FromReadQueryResultsReader::Solutions(FromReadSolutionsReader {
variables: variables.into(),
solutions: FromReadSolutionsReaderKind::Tsv(solutions),
}),
},
})
}
#[deprecated(note = "use parse_read", since = "0.4.0")]
pub fn read_results<R: Read>(
&self,
reader: R,
) -> Result<FromReadQueryResultsReader<R>, QueryResultsParseError> {
self.parse_read(reader)
}
/// Reads a result file from a Tokio [`AsyncRead`] implementation.
///
/// Reads are automatically buffered.
///
/// Example in XML (the API is the same for JSON and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader};
/// use oxrdf::{Literal, Variable};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
/// let xml_parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
///
/// // boolean
/// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head/><boolean>true</boolean></sparql>"#.as_slice()).await? {
/// assert_eq!(v, true);
/// }
///
/// // solutions
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = xml_parser.parse_tokio_async_read(br#"<sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#.as_slice()).await? {
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
/// while let Some(solution) = solutions.next().await {
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
/// }
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub async fn parse_tokio_async_read<R: AsyncRead + Unpin>(
&self,
reader: R,
) -> Result<FromTokioAsyncReadQueryResultsReader<R>, QueryResultsParseError> {
Ok(match self.format {
QueryResultsFormat::Xml => match FromTokioAsyncReadXmlQueryResultsReader::read(reader).await? {
FromTokioAsyncReadXmlQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r),
FromTokioAsyncReadXmlQueryResultsReader::Solutions {
solutions,
variables,
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader {
variables: variables.into(),
solutions: FromTokioAsyncReadSolutionsReaderKind::Xml(solutions),
}),
},
QueryResultsFormat::Json => match FromTokioAsyncReadJsonQueryResultsReader::read(reader).await? {
FromTokioAsyncReadJsonQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r),
FromTokioAsyncReadJsonQueryResultsReader::Solutions {
solutions,
variables,
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader {
variables: variables.into(),
solutions: FromTokioAsyncReadSolutionsReaderKind::Json(solutions),
}),
},
QueryResultsFormat::Csv => return Err(QueryResultsSyntaxError::msg("CSV SPARQL results syntax is lossy and can't be parsed to a proper RDF representation").into()),
QueryResultsFormat::Tsv => match FromTokioAsyncReadTsvQueryResultsReader::read(reader).await? {
FromTokioAsyncReadTsvQueryResultsReader::Boolean(r) => FromTokioAsyncReadQueryResultsReader::Boolean(r),
FromTokioAsyncReadTsvQueryResultsReader::Solutions {
solutions,
variables,
} => FromTokioAsyncReadQueryResultsReader::Solutions(FromTokioAsyncReadSolutionsReader {
variables: variables.into(),
solutions: FromTokioAsyncReadSolutionsReaderKind::Tsv(solutions),
}),
},
})
}
}
impl From<QueryResultsFormat> for QueryResultsParser {
fn from(format: QueryResultsFormat) -> Self {
Self::from_format(format)
}
}
/// The reader for a given read of a results file.
///
/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]).
///
/// Example in TSV (the API is the same for JSON and XML):
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser};
///
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
///
/// // boolean
/// if let FromReadQueryResultsReader::Boolean(v) = tsv_parser.parse_read(b"true".as_slice())? {
/// assert_eq!(v, true);
/// }
///
/// // solutions
/// if let FromReadQueryResultsReader::Solutions(solutions) =
/// tsv_parser.parse_read(b"?foo\t?bar\n\"test\"\t".as_slice())?
/// {
/// assert_eq!(
/// solutions.variables(),
/// &[
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar")
/// ]
/// );
/// for solution in solutions {
/// assert_eq!(
/// solution?.iter().collect::<Vec<_>>(),
/// vec![(
/// &Variable::new_unchecked("foo"),
/// &Literal::from("test").into()
/// )]
/// );
/// }
/// }
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
/// ```
pub enum FromReadQueryResultsReader<R: Read> {
Solutions(FromReadSolutionsReader<R>),
Boolean(bool),
}
/// A streaming reader of a set of [`QuerySolution`] solutions.
///
/// It implements the [`Iterator`] API to iterate over the solutions.
///
/// Example in JSON (the API is the same for XML and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader};
/// use oxrdf::{Literal, Variable};
///
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
/// if let FromReadQueryResultsReader::Solutions(solutions) = json_parser.parse_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice())? {
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
/// for solution in solutions {
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
/// }
/// }
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
/// ```
pub struct FromReadSolutionsReader<R: Read> {
variables: Arc<[Variable]>,
solutions: FromReadSolutionsReaderKind<R>,
}
enum FromReadSolutionsReaderKind<R: Read> {
Xml(FromReadXmlSolutionsReader<R>),
Json(FromReadJsonSolutionsReader<R>),
Tsv(FromReadTsvSolutionsReader<R>),
}
impl<R: Read> FromReadSolutionsReader<R> {
/// Ordered list of the declared variables at the beginning of the results.
///
/// Example in TSV (the API is the same for JSON and XML):
/// ```
/// use oxrdf::Variable;
/// use sparesults::{FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser};
///
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
/// if let FromReadQueryResultsReader::Solutions(solutions) =
/// tsv_parser.parse_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice())?
/// {
/// assert_eq!(
/// solutions.variables(),
/// &[
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar")
/// ]
/// );
/// }
/// # Result::<(),sparesults::QueryResultsParseError>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read> Iterator for FromReadSolutionsReader<R> {
type Item = Result<QuerySolution, QueryResultsParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(
match &mut self.solutions {
FromReadSolutionsReaderKind::Xml(reader) => reader.read_next(),
FromReadSolutionsReaderKind::Json(reader) => reader.read_next(),
FromReadSolutionsReaderKind::Tsv(reader) => reader.read_next(),
}
.transpose()?
.map(|values| (Arc::clone(&self.variables), values).into()),
)
}
}
/// The reader for a given read of a results file.
///
/// It is either a read boolean ([`bool`]) or a streaming reader of a set of solutions ([`FromReadSolutionsReader`]).
///
/// Example in TSV (the API is the same for JSON and XML):
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::{
/// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser,
/// };
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
///
/// // boolean
/// if let FromTokioAsyncReadQueryResultsReader::Boolean(v) = tsv_parser
/// .parse_tokio_async_read(b"true".as_slice())
/// .await?
/// {
/// assert_eq!(v, true);
/// }
///
/// // solutions
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = tsv_parser
/// .parse_tokio_async_read(b"?foo\t?bar\n\"test\"\t".as_slice())
/// .await?
/// {
/// assert_eq!(
/// solutions.variables(),
/// &[
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar")
/// ]
/// );
/// while let Some(solution) = solutions.next().await {
/// assert_eq!(
/// solution?.iter().collect::<Vec<_>>(),
/// vec![(
/// &Variable::new_unchecked("foo"),
/// &Literal::from("test").into()
/// )]
/// );
/// }
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub enum FromTokioAsyncReadQueryResultsReader<R: AsyncRead + Unpin> {
Solutions(FromTokioAsyncReadSolutionsReader<R>),
Boolean(bool),
}
/// A streaming reader of a set of [`QuerySolution`] solutions.
///
/// It implements the [`Iterator`] API to iterate over the solutions.
///
/// Example in JSON (the API is the same for XML and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsParser, FromTokioAsyncReadQueryResultsReader};
/// use oxrdf::{Literal, Variable};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
/// let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(mut solutions) = json_parser.parse_tokio_async_read(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).await? {
/// assert_eq!(solutions.variables(), &[Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]);
/// while let Some(solution) = solutions.next().await {
/// assert_eq!(solution?.iter().collect::<Vec<_>>(), vec![(&Variable::new_unchecked("foo"), &Literal::from("test").into())]);
/// }
/// }
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadSolutionsReader<R: AsyncRead + Unpin> {
variables: Arc<[Variable]>,
solutions: FromTokioAsyncReadSolutionsReaderKind<R>,
}
#[cfg(feature = "async-tokio")]
enum FromTokioAsyncReadSolutionsReaderKind<R: AsyncRead + Unpin> {
Json(FromTokioAsyncReadJsonSolutionsReader<R>),
Xml(FromTokioAsyncReadXmlSolutionsReader<R>),
Tsv(FromTokioAsyncReadTsvSolutionsReader<R>),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadSolutionsReader<R> {
/// Ordered list of the declared variables at the beginning of the results.
///
/// Example in TSV (the API is the same for JSON and XML):
/// ```
/// use oxrdf::Variable;
/// use sparesults::{
/// FromTokioAsyncReadQueryResultsReader, QueryResultsFormat, QueryResultsParser,
/// };
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> Result<(), sparesults::QueryResultsParseError> {
/// let tsv_parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
/// if let FromTokioAsyncReadQueryResultsReader::Solutions(solutions) = tsv_parser
/// .parse_tokio_async_read(b"?foo\t?bar\n\"ex1\"\t\"ex2\"".as_slice())
/// .await?
/// {
/// assert_eq!(
/// solutions.variables(),
/// &[
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar")
/// ]
/// );
/// }
/// # Ok(())
/// # }
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
/// Reads the next solution or returns `None` if the file is finished.
pub async fn next(&mut self) -> Option<Result<QuerySolution, QueryResultsParseError>> {
Some(
match &mut self.solutions {
FromTokioAsyncReadSolutionsReaderKind::Json(reader) => reader.read_next().await,
FromTokioAsyncReadSolutionsReaderKind::Xml(reader) => reader.read_next().await,
FromTokioAsyncReadSolutionsReaderKind::Tsv(reader) => reader.read_next().await,
}
.transpose()?
.map(|values| (Arc::clone(&self.variables), values).into()),
)
}
}

@ -0,0 +1,427 @@
use crate::oxrdf::{TermRef, Variable, VariableRef};
#[cfg(feature = "async-tokio")]
use crate::sparesults::csv::{
tokio_async_write_boolean_csv_result, ToTokioAsyncWriteCsvSolutionsWriter,
ToTokioAsyncWriteTsvSolutionsWriter,
};
use crate::sparesults::csv::{
write_boolean_csv_result, ToWriteCsvSolutionsWriter, ToWriteTsvSolutionsWriter,
};
use crate::sparesults::format::QueryResultsFormat;
#[cfg(feature = "async-tokio")]
use crate::sparesults::json::{
tokio_async_write_boolean_json_result, ToTokioAsyncWriteJsonSolutionsWriter,
};
use crate::sparesults::json::{write_boolean_json_result, ToWriteJsonSolutionsWriter};
#[cfg(feature = "async-tokio")]
use crate::sparesults::xml::{
tokio_async_write_boolean_xml_result, ToTokioAsyncWriteXmlSolutionsWriter,
};
use crate::sparesults::xml::{write_boolean_xml_result, ToWriteXmlSolutionsWriter};
use std::io::{self, Write};
#[cfg(feature = "async-tokio")]
use tokio::io::AsyncWrite;
/// A serializer for [SPARQL query](https://www.w3.org/TR/sparql11-query/) results serialization formats.
///
/// It currently supports the following formats:
/// * [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/) ([`QueryResultsFormat::Xml`](QueryResultsFormat::Xml))
/// * [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) ([`QueryResultsFormat::Json`](QueryResultsFormat::Json))
/// * [SPARQL Query Results CSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Csv`](QueryResultsFormat::Csv))
/// * [SPARQL Query Results TSV Format](https://www.w3.org/TR/sparql11-results-csv-tsv/) ([`QueryResultsFormat::Tsv`](QueryResultsFormat::Tsv))
///
/// Example in JSON (the API is the same for XML, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
/// use oxrdf::{LiteralRef, Variable, VariableRef};
/// use std::iter::once;
///
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
///
/// // boolean
/// let mut buffer = Vec::new();
/// json_serializer.serialize_boolean_to_write(&mut buffer, true)?;
/// assert_eq!(buffer, br#"{"head":{},"boolean":true}"#);
///
/// // solutions
/// let mut buffer = Vec::new();
/// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
/// writer.finish()?;
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#);
/// # std::io::Result::Ok(())
/// ```
pub struct QueryResultsSerializer {
format: QueryResultsFormat,
}
impl QueryResultsSerializer {
/// Builds a serializer for the given format.
#[inline]
pub fn from_format(format: QueryResultsFormat) -> Self {
Self { format }
}
/// Write a boolean query result (from an `ASK` query) into the given [`Write`] implementation.
///
/// Example in XML (the API is the same for JSON, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
///
/// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml);
/// let mut buffer = Vec::new();
/// xml_serializer.serialize_boolean_to_write(&mut buffer, true)?;
/// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head></head><boolean>true</boolean></sparql>"#);
/// # std::io::Result::Ok(())
/// ```
pub fn serialize_boolean_to_write<W: Write>(&self, write: W, value: bool) -> io::Result<W> {
match self.format {
QueryResultsFormat::Xml => write_boolean_xml_result(write, value),
QueryResultsFormat::Json => write_boolean_json_result(write, value),
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => {
write_boolean_csv_result(write, value)
}
}
}
/// Write a boolean query result (from an `ASK` query) into the given [`AsyncWrite`] implementation.
///
/// Example in JSON (the API is the same for XML, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
/// let mut buffer = Vec::new();
/// json_serializer
/// .serialize_boolean_to_tokio_async_write(&mut buffer, false)
/// .await?;
/// assert_eq!(buffer, br#"{"head":{},"boolean":false}"#);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub async fn serialize_boolean_to_tokio_async_write<W: AsyncWrite + Unpin>(
&self,
write: W,
value: bool,
) -> io::Result<W> {
match self.format {
QueryResultsFormat::Xml => tokio_async_write_boolean_xml_result(write, value).await,
QueryResultsFormat::Json => tokio_async_write_boolean_json_result(write, value).await,
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => {
tokio_async_write_boolean_csv_result(write, value).await
}
}
}
#[deprecated(note = "use serialize_boolean_to_write", since = "0.4.0")]
pub fn write_boolean_result<W: Write>(&self, writer: W, value: bool) -> io::Result<W> {
self.serialize_boolean_to_write(writer, value)
}
/// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// Example in XML (the API is the same for JSON, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
/// use oxrdf::{LiteralRef, Variable, VariableRef};
/// use std::iter::once;
///
/// let xml_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Xml);
/// let mut buffer = Vec::new();
/// let mut writer = xml_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
/// writer.finish()?;
/// assert_eq!(buffer, br#"<?xml version="1.0"?><sparql xmlns="http://www.w3.org/2005/sparql-results#"><head><variable name="foo"/><variable name="bar"/></head><results><result><binding name="foo"><literal>test</literal></binding></result></results></sparql>"#);
/// # std::io::Result::Ok(())
/// ```
pub fn serialize_solutions_to_write<W: Write>(
&self,
write: W,
variables: Vec<Variable>,
) -> io::Result<ToWriteSolutionsWriter<W>> {
Ok(ToWriteSolutionsWriter {
formatter: match self.format {
QueryResultsFormat::Xml => ToWriteSolutionsWriterKind::Xml(
ToWriteXmlSolutionsWriter::start(write, &variables)?,
),
QueryResultsFormat::Json => ToWriteSolutionsWriterKind::Json(
ToWriteJsonSolutionsWriter::start(write, &variables)?,
),
QueryResultsFormat::Csv => ToWriteSolutionsWriterKind::Csv(
ToWriteCsvSolutionsWriter::start(write, variables)?,
),
QueryResultsFormat::Tsv => ToWriteSolutionsWriterKind::Tsv(
ToWriteTsvSolutionsWriter::start(write, variables)?,
),
},
})
}
/// Returns a `SolutionsWriter` allowing writing query solutions into the given [`Write`] implementation.
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// Example in XML (the API is the same for JSON, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
/// use oxrdf::{LiteralRef, Variable, VariableRef};
/// use std::iter::once;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
/// let mut buffer = Vec::new();
/// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?;
/// writer.finish().await?;
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#);
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
pub async fn serialize_solutions_to_tokio_async_write<W: AsyncWrite + Unpin>(
&self,
write: W,
variables: Vec<Variable>,
) -> io::Result<ToTokioAsyncWriteSolutionsWriter<W>> {
Ok(ToTokioAsyncWriteSolutionsWriter {
formatter: match self.format {
QueryResultsFormat::Xml => ToTokioAsyncWriteSolutionsWriterKind::Xml(
ToTokioAsyncWriteXmlSolutionsWriter::start(write, &variables).await?,
),
QueryResultsFormat::Json => ToTokioAsyncWriteSolutionsWriterKind::Json(
ToTokioAsyncWriteJsonSolutionsWriter::start(write, &variables).await?,
),
QueryResultsFormat::Csv => ToTokioAsyncWriteSolutionsWriterKind::Csv(
ToTokioAsyncWriteCsvSolutionsWriter::start(write, variables).await?,
),
QueryResultsFormat::Tsv => ToTokioAsyncWriteSolutionsWriterKind::Tsv(
ToTokioAsyncWriteTsvSolutionsWriter::start(write, variables).await?,
),
},
})
}
#[deprecated(note = "use serialize_solutions_to_write", since = "0.4.0")]
pub fn solutions_writer<W: Write>(
&self,
writer: W,
variables: Vec<Variable>,
) -> io::Result<ToWriteSolutionsWriter<W>> {
self.serialize_solutions_to_write(writer, variables)
}
}
impl From<QueryResultsFormat> for QueryResultsSerializer {
fn from(format: QueryResultsFormat) -> Self {
Self::from_format(format)
}
}
/// Allows writing query results into a [`Write`] implementation.
///
/// Could be built using a [`QueryResultsSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](io::BufWriter) to avoid that.</div>
///
/// Example in TSV (the API is the same for JSON, XML and CSV):
/// ```
/// use oxrdf::{LiteralRef, Variable, VariableRef};
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
/// use std::iter::once;
///
/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
/// let mut buffer = Vec::new();
/// let mut writer = tsv_serializer.serialize_solutions_to_write(
/// &mut buffer,
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// )?;
/// writer.write(once((
/// VariableRef::new_unchecked("foo"),
/// LiteralRef::from("test"),
/// )))?;
/// writer.finish()?;
/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct ToWriteSolutionsWriter<W: Write> {
formatter: ToWriteSolutionsWriterKind<W>,
}
enum ToWriteSolutionsWriterKind<W: Write> {
Xml(ToWriteXmlSolutionsWriter<W>),
Json(ToWriteJsonSolutionsWriter<W>),
Csv(ToWriteCsvSolutionsWriter<W>),
Tsv(ToWriteTsvSolutionsWriter<W>),
}
impl<W: Write> ToWriteSolutionsWriter<W> {
/// Writes a solution.
///
/// Example in JSON (the API is the same for XML, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution};
/// use oxrdf::{Literal, LiteralRef, Variable, VariableRef};
/// use std::iter::once;
///
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
/// let mut buffer = Vec::new();
/// let mut writer = json_serializer.serialize_solutions_to_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")])?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test"))))?;
/// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())])))?;
/// writer.finish()?;
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#);
/// # std::io::Result::Ok(())
/// ```
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>,
) -> io::Result<()> {
let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into()));
match &mut self.formatter {
ToWriteSolutionsWriterKind::Xml(writer) => writer.write(solution),
ToWriteSolutionsWriterKind::Json(writer) => writer.write(solution),
ToWriteSolutionsWriterKind::Csv(writer) => writer.write(solution),
ToWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution),
}
}
/// Writes the last bytes of the file.
pub fn finish(self) -> io::Result<W> {
match self.formatter {
ToWriteSolutionsWriterKind::Xml(write) => write.finish(),
ToWriteSolutionsWriterKind::Json(write) => write.finish(),
ToWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()),
ToWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()),
}
}
}
/// Allows writing query results into an [`AsyncWrite`] implementation.
/// Could be built using a [`QueryResultsSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](ToTokioAsyncWriteSolutionsWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// <div class="warning">
///
/// This writer does unbuffered writes. You might want to use [`BufWriter`](tokio::io::BufWriter) to avoid that.</div>
///
/// Example in TSV (the API is the same for JSON, CSV and XML):
/// ```
/// use oxrdf::{LiteralRef, Variable, VariableRef};
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer};
/// use std::iter::once;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
/// let mut buffer = Vec::new();
/// let mut writer = tsv_serializer
/// .serialize_solutions_to_tokio_async_write(
/// &mut buffer,
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// )
/// .await?;
/// writer
/// .write(once((
/// VariableRef::new_unchecked("foo"),
/// LiteralRef::from("test"),
/// )))
/// .await?;
/// writer.finish().await?;
/// assert_eq!(buffer, b"?foo\t?bar\n\"test\"\t\n");
/// # Ok(())
/// # }
/// ```
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct ToTokioAsyncWriteSolutionsWriter<W: AsyncWrite + Unpin> {
formatter: ToTokioAsyncWriteSolutionsWriterKind<W>,
}
#[cfg(feature = "async-tokio")]
enum ToTokioAsyncWriteSolutionsWriterKind<W: AsyncWrite + Unpin> {
Xml(ToTokioAsyncWriteXmlSolutionsWriter<W>),
Json(ToTokioAsyncWriteJsonSolutionsWriter<W>),
Csv(ToTokioAsyncWriteCsvSolutionsWriter<W>),
Tsv(ToTokioAsyncWriteTsvSolutionsWriter<W>),
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteSolutionsWriter<W> {
/// Writes a solution.
///
/// Example in JSON (the API is the same for XML, CSV and TSV):
/// ```
/// use sparesults::{QueryResultsFormat, QueryResultsSerializer, QuerySolution};
/// use oxrdf::{Literal, LiteralRef, Variable, VariableRef};
/// use std::iter::once;
///
/// # #[tokio::main(flavor = "current_thread")]
/// # async fn main() -> std::io::Result<()> {
/// let json_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Json);
/// let mut buffer = Vec::new();
/// let mut writer = json_serializer.serialize_solutions_to_tokio_async_write(&mut buffer, vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")]).await?;
/// writer.write(once((VariableRef::new_unchecked("foo"), LiteralRef::from("test")))).await?;
/// writer.write(&QuerySolution::from((vec![Variable::new_unchecked("bar")], vec![Some(Literal::from("test").into())]))).await?;
/// writer.finish().await?;
/// assert_eq!(buffer, br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}},{"bar":{"type":"literal","value":"test"}}]}}"#);
/// # Ok(())
/// # }
/// ```
pub async fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (impl Into<VariableRef<'a>>, impl Into<TermRef<'a>>)>,
) -> io::Result<()> {
let solution = solution.into_iter().map(|(v, s)| (v.into(), s.into()));
match &mut self.formatter {
ToTokioAsyncWriteSolutionsWriterKind::Xml(writer) => writer.write(solution).await,
ToTokioAsyncWriteSolutionsWriterKind::Json(writer) => writer.write(solution).await,
ToTokioAsyncWriteSolutionsWriterKind::Csv(writer) => writer.write(solution).await,
ToTokioAsyncWriteSolutionsWriterKind::Tsv(writer) => writer.write(solution).await,
}
}
/// Writes the last bytes of the file.
pub async fn finish(self) -> io::Result<W> {
match self.formatter {
ToTokioAsyncWriteSolutionsWriterKind::Xml(write) => write.finish().await,
ToTokioAsyncWriteSolutionsWriterKind::Json(write) => write.finish().await,
ToTokioAsyncWriteSolutionsWriterKind::Csv(write) => Ok(write.finish()),
ToTokioAsyncWriteSolutionsWriterKind::Tsv(write) => Ok(write.finish()),
}
}
}

@ -0,0 +1,340 @@
//! Definition of [`QuerySolution`] structure and associated utility constructions.
use crate::oxrdf::{Term, Variable, VariableRef};
use std::fmt;
use std::iter::Zip;
use std::ops::Index;
use std::sync::Arc;
/// Tuple associating variables and terms that are the result of a SPARQL query.
///
/// It is the equivalent of a row in SQL.
///
/// ```
/// use sparesults::QuerySolution;
/// use oxrdf::{Variable, Literal};
///
/// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None]));
/// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes).
/// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no).
/// ```
pub struct QuerySolution {
variables: Arc<[Variable]>,
values: Vec<Option<Term>>,
}
impl QuerySolution {
/// Returns a value for a given position in the tuple ([`usize`](std::usize)) or a given variable name ([`&str`](std::str), [`Variable`] or [`VariableRef`]).
///
/// ```
/// use sparesults::QuerySolution;
/// use oxrdf::{Variable, Literal};
///
/// let solution = QuerySolution::from((vec![Variable::new_unchecked("foo"), Variable::new_unchecked("bar")], vec![Some(Literal::from(1).into()), None]));
/// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes).
/// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no).
/// ```
#[inline]
pub fn get(&self, index: impl VariableSolutionIndex) -> Option<&Term> {
self.values.get(index.index(self)?).and_then(Option::as_ref)
}
/// The number of variables which could be bound.
///
/// It is also the number of columns in the solutions table.
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert_eq!(solution.len(), 2);
/// ```
#[inline]
pub fn len(&self) -> usize {
self.values.len()
}
/// Is there any variable bound in the table?
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert!(!solution.is_empty());
///
/// let empty_solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![None, None],
/// ));
/// assert!(empty_solution.is_empty());
/// ```
#[inline]
pub fn is_empty(&self) -> bool {
self.values.iter().all(Option::is_none)
}
/// Returns an iterator over bound variables.
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert_eq!(
/// solution.iter().collect::<Vec<_>>(),
/// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())]
/// );
/// ```
#[inline]
pub fn iter(&self) -> impl Iterator<Item = (&Variable, &Term)> {
self.into_iter()
}
/// Returns the ordered slice of variable values.
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert_eq!(solution.values(), &[Some(Literal::from(1).into()), None]);
/// ```
#[inline]
pub fn values(&self) -> &[Option<Term>] {
&self.values
}
/// Returns the ordered slice of the solution variables, bound or not.
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert_eq!(
/// solution.variables(),
/// &[
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar")
/// ]
/// );
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<V: Into<Arc<[Variable]>>, S: Into<Vec<Option<Term>>>> From<(V, S)> for QuerySolution {
#[inline]
fn from((v, s): (V, S)) -> Self {
Self {
variables: v.into(),
values: s.into(),
}
}
}
impl<'a> IntoIterator for &'a QuerySolution {
type Item = (&'a Variable, &'a Term);
type IntoIter = Iter<'a>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
Iter {
inner: self.variables.iter().zip(&self.values),
}
}
}
impl Index<usize> for QuerySolution {
type Output = Term;
#[allow(clippy::panic)]
#[inline]
fn index(&self, index: usize) -> &Self::Output {
self.get(index)
.unwrap_or_else(|| panic!("The column {index} is not set in this solution"))
}
}
impl Index<&str> for QuerySolution {
type Output = Term;
#[allow(clippy::panic)]
#[inline]
fn index(&self, index: &str) -> &Self::Output {
self.get(index)
.unwrap_or_else(|| panic!("The variable ?{index} is not set in this solution"))
}
}
impl Index<VariableRef<'_>> for QuerySolution {
type Output = Term;
#[allow(clippy::panic)]
#[inline]
fn index(&self, index: VariableRef<'_>) -> &Self::Output {
self.get(index)
.unwrap_or_else(|| panic!("The variable {index} is not set in this solution"))
}
}
impl Index<Variable> for QuerySolution {
type Output = Term;
#[inline]
fn index(&self, index: Variable) -> &Self::Output {
self.index(index.as_ref())
}
}
impl Index<&Variable> for QuerySolution {
type Output = Term;
#[inline]
fn index(&self, index: &Variable) -> &Self::Output {
self.index(index.as_ref())
}
}
impl PartialEq for QuerySolution {
fn eq(&self, other: &Self) -> bool {
for (k, v) in self.iter() {
if other.get(k) != Some(v) {
return false;
}
}
for (k, v) in other.iter() {
if self.get(k) != Some(v) {
return false;
}
}
true
}
}
impl Eq for QuerySolution {}
impl fmt::Debug for QuerySolution {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.iter()).finish()
}
}
/// An iterator over [`QuerySolution`] bound variables.
///
/// ```
/// use oxrdf::{Literal, Variable};
/// use sparesults::QuerySolution;
///
/// let solution = QuerySolution::from((
/// vec![
/// Variable::new_unchecked("foo"),
/// Variable::new_unchecked("bar"),
/// ],
/// vec![Some(Literal::from(1).into()), None],
/// ));
/// assert_eq!(
/// solution.iter().collect::<Vec<_>>(),
/// vec![(&Variable::new_unchecked("foo"), &Literal::from(1).into())]
/// );
/// ```
pub struct Iter<'a> {
inner: Zip<std::slice::Iter<'a, Variable>, std::slice::Iter<'a, Option<Term>>>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a Variable, &'a Term);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
for (variable, value) in &mut self.inner {
if let Some(value) = value {
return Some((variable, value));
}
}
None
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(0, self.inner.size_hint().1)
}
}
/// A utility trait to get values for a given variable or tuple position.
///
/// See [`QuerySolution::get`].
pub trait VariableSolutionIndex {
fn index(self, solution: &QuerySolution) -> Option<usize>;
}
impl VariableSolutionIndex for usize {
#[inline]
fn index(self, _: &QuerySolution) -> Option<usize> {
Some(self)
}
}
impl VariableSolutionIndex for &str {
#[inline]
fn index(self, solution: &QuerySolution) -> Option<usize> {
solution.variables.iter().position(|v| v.as_str() == self)
}
}
impl VariableSolutionIndex for VariableRef<'_> {
#[inline]
fn index(self, solution: &QuerySolution) -> Option<usize> {
solution.variables.iter().position(|v| *v == self)
}
}
impl VariableSolutionIndex for &Variable {
#[inline]
fn index(self, solution: &QuerySolution) -> Option<usize> {
self.as_ref().index(solution)
}
}
impl VariableSolutionIndex for Variable {
#[inline]
fn index(self, solution: &QuerySolution) -> Option<usize> {
self.as_ref().index(solution)
}
}

@ -0,0 +1,833 @@
//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)
use crate::oxrdf::vocab::rdf;
use crate::oxrdf::*;
use crate::sparesults::error::{QueryResultsParseError, QueryResultsSyntaxError};
use quick_xml::escape::unescape;
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::{Decoder, Error, Reader, Writer};
use std::collections::BTreeMap;
use std::io::{self, BufReader, Read, Write};
use std::mem::take;
use std::sync::Arc;
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader};
pub fn write_boolean_xml_result<W: Write>(write: W, value: bool) -> io::Result<W> {
let mut writer = Writer::new(write);
for event in inner_write_boolean_xml_result(value) {
writer.write_event(event).map_err(map_xml_error)?;
}
Ok(writer.into_inner())
}
#[cfg(feature = "async-tokio")]
pub async fn tokio_async_write_boolean_xml_result<W: AsyncWrite + Unpin>(
write: W,
value: bool,
) -> io::Result<W> {
let mut writer = Writer::new(write);
for event in inner_write_boolean_xml_result(value) {
writer
.write_event_async(event)
.await
.map_err(map_xml_error)?;
}
Ok(writer.into_inner())
}
fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] {
[
Event::Decl(BytesDecl::new("1.0", None, None)),
Event::Start(
BytesStart::new("sparql")
.with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]),
),
Event::Start(BytesStart::new("head")),
Event::End(BytesEnd::new("head")),
Event::Start(BytesStart::new("boolean")),
Event::Text(BytesText::new(if value { "true" } else { "false" })),
Event::End(BytesEnd::new("boolean")),
Event::End(BytesEnd::new("sparql")),
]
}
pub struct ToWriteXmlSolutionsWriter<W: Write> {
inner: InnerXmlSolutionsWriter,
writer: Writer<W>,
}
impl<W: Write> ToWriteXmlSolutionsWriter<W> {
pub fn start(write: W, variables: &[Variable]) -> io::Result<Self> {
let mut writer = Writer::new(write);
let mut buffer = Vec::with_capacity(48);
let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables);
Self::do_write(&mut writer, buffer)?;
Ok(Self { inner, writer })
}
pub fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
let mut buffer = Vec::with_capacity(48);
self.inner.write(&mut buffer, solution);
Self::do_write(&mut self.writer, buffer)
}
pub fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::with_capacity(4);
self.inner.finish(&mut buffer);
Self::do_write(&mut self.writer, buffer)?;
Ok(self.writer.into_inner())
}
fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
for event in output {
writer.write_event(event).map_err(map_xml_error)?;
}
Ok(())
}
}
#[cfg(feature = "async-tokio")]
pub struct ToTokioAsyncWriteXmlSolutionsWriter<W: AsyncWrite + Unpin> {
inner: InnerXmlSolutionsWriter,
writer: Writer<W>,
}
#[cfg(feature = "async-tokio")]
impl<W: AsyncWrite + Unpin> ToTokioAsyncWriteXmlSolutionsWriter<W> {
pub async fn start(write: W, variables: &[Variable]) -> io::Result<Self> {
let mut writer = Writer::new(write);
let mut buffer = Vec::with_capacity(48);
let inner = InnerXmlSolutionsWriter::start(&mut buffer, variables);
Self::do_write(&mut writer, buffer).await?;
Ok(Self { inner, writer })
}
pub async fn write<'a>(
&mut self,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) -> io::Result<()> {
let mut buffer = Vec::with_capacity(48);
self.inner.write(&mut buffer, solution);
Self::do_write(&mut self.writer, buffer).await
}
pub async fn finish(mut self) -> io::Result<W> {
let mut buffer = Vec::with_capacity(4);
self.inner.finish(&mut buffer);
Self::do_write(&mut self.writer, buffer).await?;
Ok(self.writer.into_inner())
}
async fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
for event in output {
writer
.write_event_async(event)
.await
.map_err(map_xml_error)?;
}
Ok(())
}
}
struct InnerXmlSolutionsWriter;
impl InnerXmlSolutionsWriter {
fn start<'a>(output: &mut Vec<Event<'a>>, variables: &'a [Variable]) -> Self {
output.push(Event::Decl(BytesDecl::new("1.0", None, None)));
output.push(Event::Start(BytesStart::new("sparql").with_attributes([(
"xmlns",
"http://www.w3.org/2005/sparql-results#",
)])));
output.push(Event::Start(BytesStart::new("head")));
for variable in variables {
output.push(Event::Empty(
BytesStart::new("variable").with_attributes([("name", variable.as_str())]),
));
}
output.push(Event::End(BytesEnd::new("head")));
output.push(Event::Start(BytesStart::new("results")));
Self {}
}
#[allow(clippy::unused_self)]
fn write<'a>(
&self,
output: &mut Vec<Event<'a>>,
solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
) {
output.push(Event::Start(BytesStart::new("result")));
for (variable, value) in solution {
output.push(Event::Start(
BytesStart::new("binding").with_attributes([("name", variable.as_str())]),
));
write_xml_term(output, value);
output.push(Event::End(BytesEnd::new("binding")));
}
output.push(Event::End(BytesEnd::new("result")));
}
#[allow(clippy::unused_self)]
fn finish(self, output: &mut Vec<Event<'_>>) {
output.push(Event::End(BytesEnd::new("results")));
output.push(Event::End(BytesEnd::new("sparql")));
}
}
fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) {
match term {
TermRef::NamedNode(uri) => {
output.push(Event::Start(BytesStart::new("uri")));
output.push(Event::Text(BytesText::new(uri.as_str())));
output.push(Event::End(BytesEnd::new("uri")));
}
TermRef::BlankNode(bnode) => {
output.push(Event::Start(BytesStart::new("bnode")));
output.push(Event::Text(BytesText::new(bnode.as_str())));
output.push(Event::End(BytesEnd::new("bnode")));
}
TermRef::Literal(literal) => {
let mut start = BytesStart::new("literal");
if let Some(language) = literal.language() {
start.push_attribute(("xml:lang", language));
} else if !literal.is_plain() {
start.push_attribute(("datatype", literal.datatype().as_str()))
}
output.push(Event::Start(start));
output.push(Event::Text(BytesText::new(literal.value())));
output.push(Event::End(BytesEnd::new("literal")));
}
#[cfg(feature = "rdf-star")]
TermRef::Triple(triple) => {
output.push(Event::Start(BytesStart::new("triple")));
output.push(Event::Start(BytesStart::new("subject")));
write_xml_term(output, triple.subject.as_ref().into());
output.push(Event::End(BytesEnd::new("subject")));
output.push(Event::Start(BytesStart::new("predicate")));
write_xml_term(output, triple.predicate.as_ref().into());
output.push(Event::End(BytesEnd::new("predicate")));
output.push(Event::Start(BytesStart::new("object")));
write_xml_term(output, triple.object.as_ref());
output.push(Event::End(BytesEnd::new("object")));
output.push(Event::End(BytesEnd::new("triple")));
}
}
}
pub enum FromReadXmlQueryResultsReader<R: Read> {
Solutions {
variables: Vec<Variable>,
solutions: FromReadXmlSolutionsReader<R>,
},
Boolean(bool),
}
impl<R: Read> FromReadXmlQueryResultsReader<R> {
pub fn read(read: R) -> Result<Self, QueryResultsParseError> {
let mut reader = Reader::from_reader(BufReader::new(read));
reader.trim_text(true);
reader.expand_empty_elements(true);
let mut reader_buffer = Vec::new();
let mut inner = XmlInnerQueryResultsReader {
state: ResultsState::Start,
variables: Vec::new(),
decoder: reader.decoder(),
};
loop {
reader_buffer.clear();
let event = reader.read_event_into(&mut reader_buffer)?;
if let Some(result) = inner.read_event(event)? {
return Ok(match result {
XmlInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromReadXmlSolutionsReader {
reader,
inner: solutions,
reader_buffer,
},
},
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
});
}
}
}
}
pub struct FromReadXmlSolutionsReader<R: Read> {
reader: Reader<BufReader<R>>,
inner: XmlInnerSolutionsReader,
reader_buffer: Vec<u8>,
}
impl<R: Read> FromReadXmlSolutionsReader<R> {
pub fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
loop {
self.reader_buffer.clear();
let event = self.reader.read_event_into(&mut self.reader_buffer)?;
if event == Event::Eof {
return Ok(None);
}
if let Some(solution) = self.inner.read_event(event)? {
return Ok(Some(solution));
}
}
}
}
#[cfg(feature = "async-tokio")]
pub enum FromTokioAsyncReadXmlQueryResultsReader<R: AsyncRead + Unpin> {
Solutions {
variables: Vec<Variable>,
solutions: FromTokioAsyncReadXmlSolutionsReader<R>,
},
Boolean(bool),
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlQueryResultsReader<R> {
pub async fn read(read: R) -> Result<Self, QueryResultsParseError> {
let mut reader = Reader::from_reader(AsyncBufReader::new(read));
reader.trim_text(true);
reader.expand_empty_elements(true);
let mut reader_buffer = Vec::new();
let mut inner = XmlInnerQueryResultsReader {
state: ResultsState::Start,
variables: Vec::new(),
decoder: reader.decoder(),
};
loop {
reader_buffer.clear();
let event = reader.read_event_into_async(&mut reader_buffer).await?;
if let Some(result) = inner.read_event(event)? {
return Ok(match result {
XmlInnerQueryResults::Solutions {
variables,
solutions,
} => Self::Solutions {
variables,
solutions: FromTokioAsyncReadXmlSolutionsReader {
reader,
inner: solutions,
reader_buffer,
},
},
XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
});
}
}
}
}
#[cfg(feature = "async-tokio")]
pub struct FromTokioAsyncReadXmlSolutionsReader<R: AsyncRead + Unpin> {
reader: Reader<AsyncBufReader<R>>,
inner: XmlInnerSolutionsReader,
reader_buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> FromTokioAsyncReadXmlSolutionsReader<R> {
pub async fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
loop {
self.reader_buffer.clear();
let event = self
.reader
.read_event_into_async(&mut self.reader_buffer)
.await?;
if event == Event::Eof {
return Ok(None);
}
if let Some(solution) = self.inner.read_event(event)? {
return Ok(Some(solution));
}
}
}
}
enum XmlInnerQueryResults {
Solutions {
variables: Vec<Variable>,
solutions: XmlInnerSolutionsReader,
},
Boolean(bool),
}
#[derive(Clone, Copy)]
enum ResultsState {
Start,
Sparql,
Head,
AfterHead,
Boolean,
}
struct XmlInnerQueryResultsReader {
state: ResultsState,
variables: Vec<Variable>,
decoder: Decoder,
}
impl XmlInnerQueryResultsReader {
pub fn read_event(
&mut self,
event: Event<'_>,
) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> {
match event {
Event::Start(event) => match self.state {
ResultsState::Start => {
if event.local_name().as_ref() == b"sparql" {
self.state = ResultsState::Sparql;
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
}
}
ResultsState::Sparql => {
if event.local_name().as_ref() == b"head" {
self.state = ResultsState::Head;
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
}
}
ResultsState::Head => {
if event.local_name().as_ref() == b"variable" {
let name = event.attributes()
.filter_map(Result::ok)
.find(|attr| attr.key.local_name().as_ref() == b"name")
.ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?;
let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned();
let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?;
if self.variables.contains(&variable) {
return Err(QueryResultsSyntaxError::msg(format!(
"The variable {variable} is declared twice"
))
.into());
}
self.variables.push(variable);
Ok(None)
} else if event.local_name().as_ref() == b"link" {
// no op
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
}
}
ResultsState::AfterHead => {
if event.local_name().as_ref() == b"boolean" {
self.state = ResultsState::Boolean;
Ok(None)
} else if event.local_name().as_ref() == b"results" {
let mut mapping = BTreeMap::default();
for (i, var) in self.variables.iter().enumerate() {
mapping.insert(var.clone().into_string(), i);
}
Ok(Some(XmlInnerQueryResults::Solutions {
variables: take(&mut self.variables),
solutions: XmlInnerSolutionsReader {
decoder: self.decoder,
mapping,
state_stack: vec![State::Start, State::Start],
new_bindings: Vec::new(),
current_var: None,
term: None,
lang: None,
datatype: None,
subject_stack: Vec::new(),
predicate_stack: Vec::new(),
object_stack: Vec::new(),
},
}))
} else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" {
Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
} else {
Ok(None)
}
}
ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into())
},
Event::Text(event) => {
let value = event.unescape()?;
match self.state {
ResultsState::Boolean => {
if value == "true" {
Ok(Some(XmlInnerQueryResults::Boolean(true)))
} else if value == "false" {
Ok(Some(XmlInnerQueryResults::Boolean(false)))
} else {
Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into())
}
}
_ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into())
}
}
Event::End(event) => {
if let ResultsState::Head = self.state {
if event.local_name().as_ref() == b"head" {
self.state = ResultsState::AfterHead
}
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into())
}
}
Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()),
Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
Ok(None)
}
Event::Empty(_) => unreachable!("Empty events are expended"),
Event::CData(_) => {
Err(QueryResultsSyntaxError::msg(
"<![CDATA[...]]> are not supported in SPARQL XML results",
)
.into())
}
}
}
}
enum State {
Start,
Result,
Binding,
Uri,
BNode,
Literal,
Triple,
Subject,
Predicate,
Object,
}
struct XmlInnerSolutionsReader {
decoder: Decoder,
mapping: BTreeMap<String, usize>,
state_stack: Vec<State>,
new_bindings: Vec<Option<Term>>,
current_var: Option<String>,
term: Option<Term>,
lang: Option<String>,
datatype: Option<NamedNode>,
subject_stack: Vec<Term>,
predicate_stack: Vec<Term>,
object_stack: Vec<Term>,
}
impl XmlInnerSolutionsReader {
#[allow(clippy::unwrap_in_result)]
pub fn read_event(
&mut self,
event: Event<'_>,
) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
match event {
Event::Start(event) => match self.state_stack.last().unwrap() {
State::Start => {
if event.local_name().as_ref() == b"result" {
self.new_bindings = vec![None; self.mapping.len()];
self.state_stack.push(State::Result);
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!(
"Expecting <result>, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into())
}
}
State::Result => {
if event.local_name().as_ref() == b"binding" {
let Some(attr) = event
.attributes()
.filter_map(Result::ok)
.find(|attr| attr.key.local_name().as_ref() == b"name")
else {
return Err(QueryResultsSyntaxError::msg(
"No name attribute found for the <binding> tag",
)
.into());
};
self.current_var =
Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned());
self.state_stack.push(State::Binding);
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!(
"Expecting <binding>, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into())
}
}
State::Binding | State::Subject | State::Predicate | State::Object => {
if self.term.is_some() {
return Err(QueryResultsSyntaxError::msg(
"There is already a value for the current binding",
)
.into());
}
if event.local_name().as_ref() == b"uri" {
self.state_stack.push(State::Uri);
Ok(None)
} else if event.local_name().as_ref() == b"bnode" {
self.state_stack.push(State::BNode);
Ok(None)
} else if event.local_name().as_ref() == b"literal" {
for attr in event.attributes() {
let attr = attr.map_err(Error::from)?;
if attr.key.as_ref() == b"xml:lang" {
self.lang = Some(
unescape(&self.decoder.decode(&attr.value)?)?.into_owned(),
);
} else if attr.key.local_name().as_ref() == b"datatype" {
let iri = self.decoder.decode(&attr.value)?;
let iri = unescape(&iri)?;
self.datatype =
Some(NamedNode::new(iri.as_ref()).map_err(|e| {
QueryResultsSyntaxError::msg(format!(
"Invalid datatype IRI '{iri}': {e}"
))
})?);
}
}
self.state_stack.push(State::Literal);
Ok(None)
} else if event.local_name().as_ref() == b"triple" {
self.state_stack.push(State::Triple);
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!(
"Expecting <uri>, <bnode> or <literal> found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into())
}
}
State::Triple => {
if event.local_name().as_ref() == b"subject" {
self.state_stack.push(State::Subject);
Ok(None)
} else if event.local_name().as_ref() == b"predicate" {
self.state_stack.push(State::Predicate);
Ok(None)
} else if event.local_name().as_ref() == b"object" {
self.state_stack.push(State::Object);
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(format!(
"Expecting <subject>, <predicate> or <object> found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into())
}
}
State::Uri => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
State::BNode => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
State::Literal => Err(QueryResultsSyntaxError::msg(format!(
"<uri> must only contain a string, found <{}>",
self.decoder.decode(event.name().as_ref())?
))
.into()),
},
Event::Text(event) => {
let data = event.unescape()?;
match self.state_stack.last().unwrap() {
State::Uri => {
self.term = Some(
NamedNode::new(data.to_string())
.map_err(|e| {
QueryResultsSyntaxError::msg(format!(
"Invalid IRI value '{data}': {e}"
))
})?
.into(),
);
Ok(None)
}
State::BNode => {
self.term = Some(
BlankNode::new(data.to_string())
.map_err(|e| {
QueryResultsSyntaxError::msg(format!(
"Invalid blank node value '{data}': {e}"
))
})?
.into(),
);
Ok(None)
}
State::Literal => {
self.term = Some(
build_literal(data, self.lang.take(), self.datatype.take())?.into(),
);
Ok(None)
}
_ => Err(QueryResultsSyntaxError::msg(format!(
"Unexpected textual value found: {data}"
))
.into()),
}
}
Event::End(_) => match self.state_stack.pop().unwrap() {
State::Start | State::Uri => Ok(None),
State::Result => Ok(Some(take(&mut self.new_bindings))),
State::Binding => {
if let Some(var) = &self.current_var {
if let Some(var) = self.mapping.get(var) {
self.new_bindings[*var] = self.term.take()
} else {
return Err(
QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into()
);
}
} else {
return Err(QueryResultsSyntaxError::msg(
"No name found for <binding> tag",
)
.into());
}
Ok(None)
}
State::Subject => {
if let Some(subject) = self.term.take() {
self.subject_stack.push(subject)
}
Ok(None)
}
State::Predicate => {
if let Some(predicate) = self.term.take() {
self.predicate_stack.push(predicate)
}
Ok(None)
}
State::Object => {
if let Some(object) = self.term.take() {
self.object_stack.push(object)
}
Ok(None)
}
State::BNode => {
if self.term.is_none() {
// We default to a random bnode
self.term = Some(BlankNode::default().into())
}
Ok(None)
}
State::Literal => {
if self.term.is_none() {
// We default to the empty literal
self.term =
Some(build_literal("", self.lang.take(), self.datatype.take())?.into())
}
Ok(None)
}
State::Triple => {
#[cfg(feature = "rdf-star")]
if let (Some(subject), Some(predicate), Some(object)) = (
self.subject_stack.pop(),
self.predicate_stack.pop(),
self.object_stack.pop(),
) {
self.term = Some(
Triple::new(
match subject {
Term::NamedNode(subject) => subject.into(),
Term::BlankNode(subject) => subject.into(),
Term::Triple(subject) => Subject::Triple(subject),
Term::Literal(_) => {
return Err(QueryResultsSyntaxError::msg(
"The <subject> value should not be a <literal>",
)
.into());
}
},
match predicate {
Term::NamedNode(predicate) => predicate,
_ => {
return Err(QueryResultsSyntaxError::msg(
"The <predicate> value should be an <uri>",
)
.into());
}
},
object,
)
.into(),
);
Ok(None)
} else {
Err(QueryResultsSyntaxError::msg(
"A <triple> should contain a <subject>, a <predicate> and an <object>",
)
.into())
}
#[cfg(not(feature = "rdf-star"))]
{
Err(QueryResultsSyntaxError::msg(
"The <triple> tag is only supported with RDF-star",
)
.into())
}
}
},
Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
Ok(None)
}
Event::Empty(_) => unreachable!("Empty events are expended"),
Event::CData(_) => Err(QueryResultsSyntaxError::msg(
"<![CDATA[...]]> are not supported in SPARQL XML results",
)
.into()),
}
}
}
fn build_literal(
value: impl Into<String>,
lang: Option<String>,
datatype: Option<NamedNode>,
) -> Result<Literal, QueryResultsParseError> {
match lang {
Some(lang) => {
if let Some(datatype) = datatype {
if datatype.as_ref() != rdf::LANG_STRING {
return Err(QueryResultsSyntaxError::msg(format!(
"xml:lang value '{lang}' provided with the datatype {datatype}"
))
.into());
}
}
Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into()
})
}
None => Ok(if let Some(datatype) = datatype {
Literal::new_typed_literal(value, datatype)
} else {
Literal::new_simple_literal(value)
}),
}
}
fn map_xml_error(error: Error) -> io::Error {
match error {
Error::Io(error) => {
Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
}
Error::UnexpectedEof(_) => io::Error::new(io::ErrorKind::UnexpectedEof, error),
_ => io::Error::new(io::ErrorKind::InvalidData, error),
}
}

@ -0,0 +1,46 @@
Spargebra
=========
[![Latest Version](https://img.shields.io/crates/v/spargebra.svg)](https://crates.io/crates/spargebra)
[![Released API docs](https://docs.rs/spargebra/badge.svg)](https://docs.rs/spargebra)
[![Crates.io downloads](https://img.shields.io/crates/d/spargebra)](https://crates.io/crates/spargebra)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Spargebra is a [SPARQL](https://www.w3.org/TR/sparql11-overview/) parser.
It supports both [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) and [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/).
The emitted tree is based on [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) objects.
The API entry point for SPARQL queries is the [`Query`] struct and the API entry point for SPARQL updates is the [`Update`] struct.
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature.
This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org).
Usage example:
```rust
use spargebra::Query;
let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
let query = Query::parse(query_str, None).unwrap();
assert_eq!(query.to_string(), query_str);
```
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,9 @@
pub mod algebra;
mod parser;
mod query;
pub mod term;
mod update;
pub use parser::SparqlSyntaxError;
pub use query::*;
pub use update::*;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,300 @@
use crate::spargebra::algebra::*;
use crate::spargebra::parser::{parse_query, SparqlSyntaxError};
use crate::spargebra::term::*;
use oxiri::Iri;
use std::fmt;
use std::str::FromStr;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
///
/// ```
/// use spargebra::Query;
///
/// let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
/// let query = Query::parse(query_str, None)?;
/// assert_eq!(query.to_string(), query_str);
/// assert_eq!(
/// query.to_sse(),
/// "(project (?s ?p ?o) (bgp (triple ?s ?p ?o)))"
/// );
/// # Ok::<_, spargebra::SparqlSyntaxError>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Query {
/// [SELECT](https://www.w3.org/TR/sparql11-query/#select).
Select {
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
dataset: Option<QueryDataset>,
/// The query selection graph pattern.
pattern: GraphPattern,
/// The query base IRI.
base_iri: Option<Iri<String>>,
},
/// [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct).
Construct {
/// The query construction template.
template: Vec<TriplePattern>,
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
dataset: Option<QueryDataset>,
/// The query selection graph pattern.
pattern: GraphPattern,
/// The query base IRI.
base_iri: Option<Iri<String>>,
},
/// [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe).
Describe {
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
dataset: Option<QueryDataset>,
/// The query selection graph pattern.
pattern: GraphPattern,
/// The query base IRI.
base_iri: Option<Iri<String>>,
},
/// [ASK](https://www.w3.org/TR/sparql11-query/#ask).
Ask {
/// The [query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset).
dataset: Option<QueryDataset>,
/// The query selection graph pattern.
pattern: GraphPattern,
/// The query base IRI.
base_iri: Option<Iri<String>>,
},
}
impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> {
parse_query(query, base_iri)
}
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
pub fn to_sse(&self) -> String {
let mut buffer = String::new();
self.fmt_sse(&mut buffer).unwrap();
buffer
}
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
match self {
Self::Select {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
write!(f, "(base <{base_iri}> ")?;
}
if let Some(dataset) = dataset {
f.write_str("(dataset ")?;
dataset.fmt_sse(f)?;
f.write_str(" ")?;
}
pattern.fmt_sse(f)?;
if dataset.is_some() {
f.write_str(")")?;
}
if base_iri.is_some() {
f.write_str(")")?;
}
Ok(())
}
Self::Construct {
template,
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
write!(f, "(base <{base_iri}> ")?;
}
f.write_str("(construct (")?;
for (i, t) in template.iter().enumerate() {
if i > 0 {
f.write_str(" ")?;
}
t.fmt_sse(f)?;
}
f.write_str(") ")?;
if let Some(dataset) = dataset {
f.write_str("(dataset ")?;
dataset.fmt_sse(f)?;
f.write_str(" ")?;
}
pattern.fmt_sse(f)?;
if dataset.is_some() {
f.write_str(")")?;
}
f.write_str(")")?;
if base_iri.is_some() {
f.write_str(")")?;
}
Ok(())
}
Self::Describe {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
write!(f, "(base <{base_iri}> ")?;
}
f.write_str("(describe ")?;
if let Some(dataset) = dataset {
f.write_str("(dataset ")?;
dataset.fmt_sse(f)?;
f.write_str(" ")?;
}
pattern.fmt_sse(f)?;
if dataset.is_some() {
f.write_str(")")?;
}
f.write_str(")")?;
if base_iri.is_some() {
f.write_str(")")?;
}
Ok(())
}
Self::Ask {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
write!(f, "(base <{base_iri}> ")?;
}
f.write_str("(ask ")?;
if let Some(dataset) = dataset {
f.write_str("(dataset ")?;
dataset.fmt_sse(f)?;
f.write_str(" ")?;
}
pattern.fmt_sse(f)?;
if dataset.is_some() {
f.write_str(")")?;
}
f.write_str(")")?;
if base_iri.is_some() {
f.write_str(")")?;
}
Ok(())
}
}
}
}
impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Select {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
writeln!(f, "BASE <{base_iri}>")?;
}
write!(
f,
"{}",
SparqlGraphRootPattern {
pattern,
dataset: dataset.as_ref()
}
)
}
Self::Construct {
template,
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
writeln!(f, "BASE <{base_iri}>")?;
}
f.write_str("CONSTRUCT { ")?;
for triple in template {
write!(f, "{triple} . ")?;
}
f.write_str("}")?;
if let Some(dataset) = dataset {
dataset.fmt(f)?;
}
write!(
f,
" WHERE {{ {} }}",
SparqlGraphRootPattern {
pattern,
dataset: None
}
)
}
Self::Describe {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
writeln!(f, "BASE <{}>", base_iri.as_str())?;
}
f.write_str("DESCRIBE *")?;
if let Some(dataset) = dataset {
dataset.fmt(f)?;
}
write!(
f,
" WHERE {{ {} }}",
SparqlGraphRootPattern {
pattern,
dataset: None
}
)
}
Self::Ask {
dataset,
pattern,
base_iri,
} => {
if let Some(base_iri) = base_iri {
writeln!(f, "BASE <{base_iri}>")?;
}
f.write_str("ASK")?;
if let Some(dataset) = dataset {
dataset.fmt(f)?;
}
write!(
f,
" WHERE {{ {} }}",
SparqlGraphRootPattern {
pattern,
dataset: None
}
)
}
}
}
}
impl FromStr for Query {
type Err = SparqlSyntaxError;
fn from_str(query: &str) -> Result<Self, Self::Err> {
Self::parse(query, None)
}
}
impl<'a> TryFrom<&'a str> for Query {
type Error = SparqlSyntaxError;
fn try_from(query: &str) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}
impl<'a> TryFrom<&'a String> for Query {
type Error = SparqlSyntaxError;
fn try_from(query: &String) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,344 @@
use crate::spargebra::algebra::*;
use crate::spargebra::parser::{parse_update, SparqlSyntaxError};
use crate::spargebra::term::*;
use oxiri::Iri;
use std::fmt;
use std::str::FromStr;
/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/).
///
/// ```
/// use spargebra::Update;
///
/// let update_str = "CLEAR ALL ;";
/// let update = Update::parse(update_str, None)?;
/// assert_eq!(update.to_string().trim(), update_str);
/// assert_eq!(update.to_sse(), "(update (clear all))");
/// # Ok::<_, spargebra::SparqlSyntaxError>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Update {
/// The update base IRI.
pub base_iri: Option<Iri<String>>,
/// The [update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate).
pub operations: Vec<GraphUpdateOperation>,
}
impl Update {
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, SparqlSyntaxError> {
parse_update(update, base_iri)
}
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
pub fn to_sse(&self) -> String {
let mut buffer = String::new();
self.fmt_sse(&mut buffer).unwrap();
buffer
}
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
if let Some(base_iri) = &self.base_iri {
write!(f, "(base <{base_iri}> ")?;
}
f.write_str("(update")?;
for op in &self.operations {
f.write_str(" ")?;
op.fmt_sse(f)?;
}
f.write_str(")")?;
if self.base_iri.is_some() {
f.write_str(")")?;
}
Ok(())
}
}
impl fmt::Display for Update {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(base_iri) = &self.base_iri {
writeln!(f, "BASE <{base_iri}>")?;
}
for update in &self.operations {
writeln!(f, "{update} ;")?;
}
Ok(())
}
}
impl FromStr for Update {
type Err = SparqlSyntaxError;
fn from_str(update: &str) -> Result<Self, Self::Err> {
Self::parse(update, None)
}
}
impl<'a> TryFrom<&'a str> for Update {
type Error = SparqlSyntaxError;
fn try_from(update: &str) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
impl<'a> TryFrom<&'a String> for Update {
type Error = SparqlSyntaxError;
fn try_from(update: &String) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
/// The [graph update operations](https://www.w3.org/TR/sparql11-update/#formalModelGraphUpdate).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum GraphUpdateOperation {
/// [insert data](https://www.w3.org/TR/sparql11-update/#defn_insertDataOperation).
InsertData { data: Vec<Quad> },
/// [delete data](https://www.w3.org/TR/sparql11-update/#defn_deleteDataOperation).
DeleteData { data: Vec<GroundQuad> },
/// [delete insert](https://www.w3.org/TR/sparql11-update/#defn_deleteInsertOperation).
DeleteInsert {
delete: Vec<GroundQuadPattern>,
insert: Vec<QuadPattern>,
using: Option<QueryDataset>,
pattern: Box<GraphPattern>,
},
/// [load](https://www.w3.org/TR/sparql11-update/#defn_loadOperation).
Load {
silent: bool,
source: NamedNode,
destination: GraphName,
},
/// [clear](https://www.w3.org/TR/sparql11-update/#defn_clearOperation).
Clear { silent: bool, graph: GraphTarget },
/// [create](https://www.w3.org/TR/sparql11-update/#defn_createOperation).
Create { silent: bool, graph: NamedNode },
/// [drop](https://www.w3.org/TR/sparql11-update/#defn_dropOperation).
Drop { silent: bool, graph: GraphTarget },
}
impl GraphUpdateOperation {
/// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html).
fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
match self {
Self::InsertData { data } => {
f.write_str("(insertData (")?;
for (i, t) in data.iter().enumerate() {
if i > 0 {
f.write_str(" ")?;
}
t.fmt_sse(f)?;
}
f.write_str("))")
}
Self::DeleteData { data } => {
f.write_str("(deleteData (")?;
for (i, t) in data.iter().enumerate() {
if i > 0 {
f.write_str(" ")?;
}
t.fmt_sse(f)?;
}
f.write_str("))")
}
Self::DeleteInsert {
delete,
insert,
using,
pattern,
} => {
f.write_str("(modify ")?;
if let Some(using) = using {
f.write_str(" (using ")?;
using.fmt_sse(f)?;
f.write_str(" ")?;
pattern.fmt_sse(f)?;
f.write_str(")")?;
} else {
pattern.fmt_sse(f)?;
}
if !delete.is_empty() {
f.write_str(" (delete (")?;
for (i, t) in delete.iter().enumerate() {
if i > 0 {
f.write_str(" ")?;
}
t.fmt_sse(f)?;
}
f.write_str("))")?;
}
if !insert.is_empty() {
f.write_str(" (insert (")?;
for (i, t) in insert.iter().enumerate() {
if i > 0 {
f.write_str(" ")?;
}
t.fmt_sse(f)?;
}
f.write_str("))")?;
}
f.write_str(")")
}
Self::Load {
silent,
source,
destination,
} => {
f.write_str("(load ")?;
if *silent {
f.write_str("silent ")?;
}
write!(f, "{source} ")?;
destination.fmt_sse(f)?;
f.write_str(")")
}
Self::Clear { silent, graph } => {
f.write_str("(clear ")?;
if *silent {
f.write_str("silent ")?;
}
graph.fmt_sse(f)?;
f.write_str(")")
}
Self::Create { silent, graph } => {
f.write_str("(create ")?;
if *silent {
f.write_str("silent ")?;
}
write!(f, "{graph})")
}
Self::Drop { silent, graph } => {
f.write_str("(drop ")?;
if *silent {
f.write_str("silent ")?;
}
graph.fmt_sse(f)?;
f.write_str(")")
}
}
}
}
impl fmt::Display for GraphUpdateOperation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InsertData { data } => {
writeln!(f, "INSERT DATA {{")?;
write_quads(data, f)?;
f.write_str("}")
}
Self::DeleteData { data } => {
writeln!(f, "DELETE DATA {{")?;
write_ground_quads(data, f)?;
f.write_str("}")
}
Self::DeleteInsert {
delete,
insert,
using,
pattern,
} => {
if !delete.is_empty() {
writeln!(f, "DELETE {{")?;
for quad in delete {
writeln!(f, "\t{quad} .")?;
}
writeln!(f, "}}")?;
}
if !insert.is_empty() {
writeln!(f, "INSERT {{")?;
for quad in insert {
writeln!(f, "\t{quad} .")?;
}
writeln!(f, "}}")?;
}
if let Some(using) = using {
for g in &using.default {
writeln!(f, "USING {g}")?;
}
if let Some(named) = &using.named {
for g in named {
writeln!(f, "USING NAMED {g}")?;
}
}
}
write!(
f,
"WHERE {{ {} }}",
SparqlGraphRootPattern {
pattern,
dataset: None
}
)
}
Self::Load {
silent,
source,
destination,
} => {
f.write_str("LOAD ")?;
if *silent {
f.write_str("SILENT ")?;
}
write!(f, "{source}")?;
if destination != &GraphName::DefaultGraph {
write!(f, " INTO GRAPH {destination}")?;
}
Ok(())
}
Self::Clear { silent, graph } => {
f.write_str("CLEAR ")?;
if *silent {
f.write_str("SILENT ")?;
}
write!(f, "{graph}")
}
Self::Create { silent, graph } => {
f.write_str("CREATE ")?;
if *silent {
f.write_str("SILENT ")?;
}
write!(f, "GRAPH {graph}")
}
Self::Drop { silent, graph } => {
f.write_str("DROP ")?;
if *silent {
f.write_str("SILENT ")?;
}
write!(f, "{graph}")
}
}
}
}
fn write_quads(quads: &[Quad], f: &mut fmt::Formatter<'_>) -> fmt::Result {
for quad in quads {
if quad.graph_name == GraphName::DefaultGraph {
writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?;
} else {
writeln!(
f,
"\tGRAPH {} {{ {} {} {} }}",
quad.graph_name, quad.subject, quad.predicate, quad.object
)?;
}
}
Ok(())
}
fn write_ground_quads(quads: &[GroundQuad], f: &mut fmt::Formatter<'_>) -> fmt::Result {
for quad in quads {
if quad.graph_name == GraphName::DefaultGraph {
writeln!(f, "\t{} {} {} .", quad.subject, quad.predicate, quad.object)?;
} else {
writeln!(
f,
"\tGRAPH {} {{ {} {} {} }}",
quad.graph_name, quad.subject, quad.predicate, quad.object
)?;
}
}
Ok(())
}

@ -0,0 +1,33 @@
sparopt
=======
[![Latest Version](https://img.shields.io/crates/v/sparopt.svg)](https://crates.io/crates/sparopt)
[![Released API docs](https://docs.rs/sparopt/badge.svg)](https://docs.rs/sparopt)
[![Crates.io downloads](https://img.shields.io/crates/d/sparopt)](https://crates.io/crates/sparopt)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
sparopt is a work in progress [SPARQL Query](https://www.w3.org/TR/sparql11-query/) optimizer.
It relies on the output of [spargebra](https://crates.io/crates/spargebra).
Support for [SPARQL-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#sparql-star) is also available behind the `rdf-star` feature.
This crate is intended to be a building piece for SPARQL implementations in Rust like [Oxigraph](https://oxigraph.org).
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,5 @@
pub use crate::sparopt::optimizer::Optimizer;
pub mod algebra;
mod optimizer;
mod type_inference;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,462 @@
use crate::oxrdf::Variable;
use crate::spargebra::algebra::Function;
use crate::spargebra::term::{GroundTerm, GroundTermPattern, NamedNodePattern};
use crate::sparopt::algebra::{Expression, GraphPattern};
use std::collections::HashMap;
use std::ops::{BitAnd, BitOr};
pub fn infer_graph_pattern_types(
pattern: &GraphPattern,
mut types: VariableTypes,
) -> VariableTypes {
match pattern {
GraphPattern::QuadPattern {
subject,
predicate,
object,
graph_name,
} => {
add_ground_term_pattern_types(subject, &mut types, false);
if let NamedNodePattern::Variable(v) = predicate {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
}
add_ground_term_pattern_types(object, &mut types, true);
if let Some(NamedNodePattern::Variable(v)) = graph_name {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
}
types
}
GraphPattern::Path {
subject,
object,
graph_name,
..
} => {
add_ground_term_pattern_types(subject, &mut types, false);
add_ground_term_pattern_types(object, &mut types, true);
if let Some(NamedNodePattern::Variable(v)) = graph_name {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
}
types
}
GraphPattern::Join { left, right, .. } => {
let mut output_types = infer_graph_pattern_types(left, types.clone());
output_types.intersect_with(infer_graph_pattern_types(right, types));
output_types
}
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => {
infer_graph_pattern_types(right, infer_graph_pattern_types(left, types))
}
GraphPattern::LeftJoin { left, right, .. } => {
let mut right_types = infer_graph_pattern_types(right, types.clone()); // TODO: expression
for t in right_types.inner.values_mut() {
t.undef = true; // Right might be unset
}
let mut output_types = infer_graph_pattern_types(left, types);
output_types.intersect_with(right_types);
output_types
}
GraphPattern::Minus { left, .. } => infer_graph_pattern_types(left, types),
GraphPattern::Union { inner } => inner
.iter()
.map(|inner| infer_graph_pattern_types(inner, types.clone()))
.reduce(|mut a, b| {
a.union_with(b);
a
})
.unwrap_or_default(),
GraphPattern::Extend {
inner,
variable,
expression,
} => {
let mut types = infer_graph_pattern_types(inner, types);
types.intersect_variable_with(
variable.clone(),
infer_expression_type(expression, &types),
);
types
}
GraphPattern::Filter { inner, .. } => infer_graph_pattern_types(inner, types),
GraphPattern::Project { inner, variables } => VariableTypes {
inner: infer_graph_pattern_types(inner, types)
.inner
.into_iter()
.filter(|(v, _)| variables.contains(v))
.collect(),
},
GraphPattern::Distinct { inner }
| GraphPattern::Reduced { inner }
| GraphPattern::OrderBy { inner, .. }
| GraphPattern::Slice { inner, .. } => infer_graph_pattern_types(inner, types),
GraphPattern::Group {
inner,
variables,
aggregates,
} => {
let types = infer_graph_pattern_types(inner, types);
VariableTypes {
inner: infer_graph_pattern_types(inner, types)
.inner
.into_iter()
.filter(|(v, _)| variables.contains(v))
.chain(
aggregates
.iter()
.map(|(v, _)| (v.clone(), VariableType::ANY)),
) //TODO: guess from aggregate
.collect(),
}
}
GraphPattern::Values {
variables,
bindings,
} => {
for (i, v) in variables.iter().enumerate() {
let mut t = VariableType::default();
for binding in bindings {
match binding[i] {
Some(GroundTerm::NamedNode(_)) => t.named_node = true,
Some(GroundTerm::Literal(_)) => t.literal = true,
#[cfg(feature = "rdf-star")]
Some(GroundTerm::Triple(_)) => t.triple = true,
None => t.undef = true,
}
}
types.intersect_variable_with(v.clone(), t)
}
types
}
GraphPattern::Service {
name,
inner,
silent,
} => {
let parent_types = types.clone();
let mut types = infer_graph_pattern_types(inner, types);
if let NamedNodePattern::Variable(v) = name {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
}
if *silent {
// On failure, single empty solution
types.union_with(parent_types);
}
types
}
}
}
fn add_ground_term_pattern_types(
pattern: &GroundTermPattern,
types: &mut VariableTypes,
is_object: bool,
) {
if let GroundTermPattern::Variable(v) = pattern {
types.intersect_variable_with(
v.clone(),
if is_object {
VariableType::TERM
} else {
VariableType::SUBJECT
},
)
}
#[cfg(feature = "rdf-star")]
if let GroundTermPattern::Triple(t) = pattern {
add_ground_term_pattern_types(&t.subject, types, false);
if let NamedNodePattern::Variable(v) = &t.predicate {
types.intersect_variable_with(v.clone(), VariableType::NAMED_NODE)
}
add_ground_term_pattern_types(&t.object, types, true);
}
}
pub fn infer_expression_type(expression: &Expression, types: &VariableTypes) -> VariableType {
match expression {
Expression::NamedNode(_) => VariableType::NAMED_NODE,
Expression::Literal(_) | Expression::Exists(_) | Expression::Bound(_) => {
VariableType::LITERAL
}
Expression::Variable(v) => types.get(v),
Expression::FunctionCall(Function::Datatype | Function::Iri, _) => {
VariableType::NAMED_NODE | VariableType::UNDEF
}
#[cfg(feature = "rdf-star")]
Expression::FunctionCall(Function::Predicate, _) => {
VariableType::NAMED_NODE | VariableType::UNDEF
}
Expression::FunctionCall(Function::BNode, args) => {
if args.is_empty() {
VariableType::BLANK_NODE
} else {
VariableType::BLANK_NODE | VariableType::UNDEF
}
}
Expression::FunctionCall(
Function::Rand | Function::Now | Function::Uuid | Function::StrUuid,
_,
) => VariableType::LITERAL,
Expression::Or(_)
| Expression::And(_)
| Expression::Equal(_, _)
| Expression::Greater(_, _)
| Expression::GreaterOrEqual(_, _)
| Expression::Less(_, _)
| Expression::LessOrEqual(_, _)
| Expression::Add(_, _)
| Expression::Subtract(_, _)
| Expression::Multiply(_, _)
| Expression::Divide(_, _)
| Expression::UnaryPlus(_)
| Expression::UnaryMinus(_)
| Expression::Not(_)
| Expression::FunctionCall(
Function::Str
| Function::Lang
| Function::LangMatches
| Function::Abs
| Function::Ceil
| Function::Floor
| Function::Round
| Function::Concat
| Function::SubStr
| Function::StrLen
| Function::Replace
| Function::UCase
| Function::LCase
| Function::EncodeForUri
| Function::Contains
| Function::StrStarts
| Function::StrEnds
| Function::StrBefore
| Function::StrAfter
| Function::Year
| Function::Month
| Function::Day
| Function::Hours
| Function::Minutes
| Function::Seconds
| Function::Timezone
| Function::Tz
| Function::Md5
| Function::Sha1
| Function::Sha256
| Function::Sha384
| Function::Sha512
| Function::StrLang
| Function::StrDt
| Function::IsIri
| Function::IsBlank
| Function::IsLiteral
| Function::IsNumeric
| Function::Regex,
_,
) => VariableType::LITERAL | VariableType::UNDEF,
#[cfg(feature = "sep-0002")]
Expression::FunctionCall(Function::Adjust, _) => {
VariableType::LITERAL | VariableType::UNDEF
}
#[cfg(feature = "rdf-star")]
Expression::FunctionCall(Function::IsTriple, _) => {
VariableType::LITERAL | VariableType::UNDEF
}
Expression::SameTerm(left, right) => {
if infer_expression_type(left, types).undef || infer_expression_type(right, types).undef
{
VariableType::LITERAL | VariableType::UNDEF
} else {
VariableType::LITERAL
}
}
Expression::If(_, then, els) => {
infer_expression_type(then, types) | infer_expression_type(els, types)
}
Expression::Coalesce(inner) => {
let mut t = VariableType::UNDEF;
for e in inner {
let new = infer_expression_type(e, types);
t = t | new;
if !new.undef {
t.undef = false;
return t;
}
}
t
}
#[cfg(feature = "rdf-star")]
Expression::FunctionCall(Function::Triple, _) => VariableType::TRIPLE | VariableType::UNDEF,
#[cfg(feature = "rdf-star")]
Expression::FunctionCall(Function::Subject, _) => {
VariableType::SUBJECT | VariableType::UNDEF
}
#[cfg(feature = "rdf-star")]
Expression::FunctionCall(Function::Object, _) => VariableType::TERM | VariableType::UNDEF,
Expression::FunctionCall(Function::Custom(_), _) => VariableType::ANY,
}
}
#[derive(Default, Clone, Debug)]
pub struct VariableTypes {
inner: HashMap<Variable, VariableType>,
}
impl VariableTypes {
pub fn get(&self, variable: &Variable) -> VariableType {
self.inner
.get(variable)
.copied()
.unwrap_or(VariableType::UNDEF)
}
pub fn iter(&self) -> impl Iterator<Item = (&Variable, &VariableType)> {
self.inner.iter()
}
pub fn intersect_with(&mut self, other: Self) {
for (v, t) in other.inner {
self.intersect_variable_with(v, t);
}
}
pub fn union_with(&mut self, other: Self) {
for (v, t) in &mut self.inner {
if other.get(v).undef {
t.undef = true; // Might be undefined
}
}
for (v, mut t) in other.inner {
self.inner
.entry(v)
.and_modify(|ex| *ex = *ex | t)
.or_insert({
t.undef = true;
t
});
}
}
fn intersect_variable_with(&mut self, variable: Variable, t: VariableType) {
let t = self.get(&variable) & t;
if t != VariableType::UNDEF {
self.inner.insert(variable, t);
}
}
}
#[allow(clippy::struct_excessive_bools)]
#[derive(Clone, Copy, Eq, PartialEq, Debug, Default)]
pub struct VariableType {
pub undef: bool,
pub named_node: bool,
pub blank_node: bool,
pub literal: bool,
#[cfg(feature = "rdf-star")]
pub triple: bool,
}
impl VariableType {
const ANY: Self = Self {
undef: true,
named_node: true,
blank_node: true,
literal: true,
#[cfg(feature = "rdf-star")]
triple: true,
};
const BLANK_NODE: Self = Self {
undef: false,
named_node: false,
blank_node: true,
literal: false,
#[cfg(feature = "rdf-star")]
triple: false,
};
const LITERAL: Self = Self {
undef: false,
named_node: false,
blank_node: false,
literal: true,
#[cfg(feature = "rdf-star")]
triple: false,
};
const NAMED_NODE: Self = Self {
undef: false,
named_node: true,
blank_node: false,
literal: false,
#[cfg(feature = "rdf-star")]
triple: false,
};
const SUBJECT: Self = Self {
undef: false,
named_node: true,
blank_node: true,
literal: false,
#[cfg(feature = "rdf-star")]
triple: true,
};
const TERM: Self = Self {
undef: false,
named_node: true,
blank_node: true,
literal: true,
#[cfg(feature = "rdf-star")]
triple: true,
};
#[cfg(feature = "rdf-star")]
const TRIPLE: Self = Self {
undef: false,
named_node: false,
blank_node: false,
literal: false,
triple: true,
};
pub const UNDEF: Self = Self {
undef: true,
named_node: false,
blank_node: false,
literal: false,
#[cfg(feature = "rdf-star")]
triple: false,
};
}
impl BitOr for VariableType {
type Output = Self;
fn bitor(self, other: Self) -> Self {
Self {
undef: self.undef || other.undef,
named_node: self.named_node || other.named_node,
blank_node: self.blank_node || other.blank_node,
literal: self.literal || other.literal,
#[cfg(feature = "rdf-star")]
triple: self.triple || other.triple,
}
}
}
impl BitAnd for VariableType {
type Output = Self;
#[allow(clippy::nonminimal_bool)]
fn bitand(self, other: Self) -> Self {
Self {
undef: self.undef && other.undef,
named_node: self.named_node && other.named_node
|| (self.undef && other.named_node)
|| (self.named_node && other.undef),
blank_node: self.blank_node && other.blank_node
|| (self.undef && other.blank_node)
|| (self.blank_node && other.undef),
literal: self.literal && other.literal
|| (self.undef && other.literal)
|| (self.literal && other.undef),
#[cfg(feature = "rdf-star")]
triple: self.triple && other.triple
|| (self.undef && other.triple)
|| (self.triple && other.undef),
}
}
}

@ -1,10 +1,10 @@
#![cfg(test)]
#![allow(clippy::panic_in_result_fn)]
use ng_oxigraph::io::RdfFormat;
use ng_oxigraph::model::vocab::{rdf, xsd};
use ng_oxigraph::model::*;
use ng_oxigraph::store::Store;
use ng_oxigraph::oxigraph::io::RdfFormat;
use ng_oxigraph::oxigraph::model::vocab::{rdf, xsd};
use ng_oxigraph::oxigraph::model::*;
use ng_oxigraph::oxigraph::store::Store;
#[cfg(all(not(target_family = "wasm")))]
use rand::random;
#[cfg(all(not(target_family = "wasm")))]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save