From 9da26c6f95e1adbc0b2b68ac6d5c391e1d7b058d Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 31 Aug 2023 22:41:12 +0200 Subject: [PATCH] Makes media type to format more robust Allows more combinations --- lib/oxrdfio/src/format.rs | 43 ++++++---- lib/sparesults/src/lib.rs | 78 ++++++++++++++--- server/src/main.rs | 173 ++++++++++++++++++++++++-------------- 3 files changed, 199 insertions(+), 95 deletions(-) diff --git a/lib/oxrdfio/src/format.rs b/lib/oxrdfio/src/format.rs index 8c4ce230..cb03a3eb 100644 --- a/lib/oxrdfio/src/format.rs +++ b/lib/oxrdfio/src/format.rs @@ -140,25 +140,32 @@ impl RdfFormat { /// ``` #[inline] pub fn from_media_type(media_type: &str) -> Option { - const MEDIA_TYPES: [(&str, RdfFormat); 14] = [ - ("application/n-quads", RdfFormat::NQuads), - ("application/n-triples", RdfFormat::NTriples), - ("application/rdf+xml", RdfFormat::RdfXml), - ("application/trig", RdfFormat::TriG), - ("application/turtle", RdfFormat::Turtle), - ("application/xml", RdfFormat::RdfXml), - ("application/x-trig", RdfFormat::TriG), - ("application/x-turtle", RdfFormat::Turtle), - ("text/n3", RdfFormat::N3), - ("text/nquads", RdfFormat::NQuads), - ("text/plain", RdfFormat::NTriples), - ("text/turtle", RdfFormat::Turtle), - ("text/xml", RdfFormat::RdfXml), - ("text/x-nquads", RdfFormat::NQuads), + const MEDIA_SUBTYPES: [(&str, RdfFormat); 10] = [ + ("n-quads", RdfFormat::NQuads), + ("n-triples", RdfFormat::NTriples), + ("n3", RdfFormat::N3), + ("nquads", RdfFormat::NQuads), + ("ntriples", RdfFormat::NTriples), + ("plain", RdfFormat::NTriples), + ("rdf+xml", RdfFormat::RdfXml), + ("trig", RdfFormat::TriG), + ("turtle", RdfFormat::Turtle), + ("xml", RdfFormat::RdfXml), ]; - let media_type = media_type.split(';').next()?.trim(); - for (candidate_media_type, candidate_id) in MEDIA_TYPES { - if candidate_media_type.eq_ignore_ascii_case(media_type) { + + let (r#type, subtype) = media_type + .split_once(';') + .unwrap_or((media_type, "")) + .0 + .split_once('/')?; + let r#type = r#type.trim(); + if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { + return None; + } + let subtype = subtype.trim(); + let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); + for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { + if candidate_subtype.eq_ignore_ascii_case(subtype) { return Some(candidate_id); } } diff --git a/lib/sparesults/src/lib.rs b/lib/sparesults/src/lib.rs index 223dcaa8..7d1a6729 100644 --- a/lib/sparesults/src/lib.rs +++ b/lib/sparesults/src/lib.rs @@ -16,6 +16,7 @@ use crate::json::*; pub use crate::solution::QuerySolution; use crate::xml::*; use oxrdf::{TermRef, Variable, VariableRef}; +use std::fmt; use std::io::{self, BufRead, Write}; use std::rc::Rc; @@ -84,6 +85,23 @@ impl QueryResultsFormat { } } + /// The format name. + /// + /// ``` + /// use sparesults::QueryResultsFormat; + /// + /// assert_eq!(QueryResultsFormat::Json.name(), "SPARQL Results in JSON") + /// ``` + #[inline] + pub const fn name(self) -> &'static str { + match self { + Self::Xml => "SPARQL Results in XML", + Self::Json => "SPARQL Results in JSON", + Self::Csv => "SPARQL Results in CSV", + Self::Tsv => "SPARQL Results in TSV", + } + } + /// Looks for a known format from a media type. /// /// It supports some media type aliases. @@ -97,15 +115,35 @@ impl QueryResultsFormat { /// ``` #[inline] pub fn from_media_type(media_type: &str) -> Option { - match media_type.split(';').next()?.trim() { - "application/sparql-results+xml" | "application/xml" | "text/xml" => Some(Self::Xml), - "application/sparql-results+json" | "application/json" | "text/json" => { - Some(Self::Json) + const MEDIA_SUBTYPES: [(&str, QueryResultsFormat); 8] = [ + ("csv", QueryResultsFormat::Csv), + ("json", QueryResultsFormat::Json), + ("plain", QueryResultsFormat::Csv), + ("sparql-results+json", QueryResultsFormat::Json), + ("sparql-results+xml", QueryResultsFormat::Xml), + ("tab-separated-values", QueryResultsFormat::Tsv), + ("tsv", QueryResultsFormat::Tsv), + ("xml", QueryResultsFormat::Xml), + ]; + + let (r#type, subtype) = media_type + .split_once(';') + .unwrap_or((media_type, "")) + .0 + .trim() + .split_once('/')?; + let r#type = r#type.trim(); + if !r#type.eq_ignore_ascii_case("application") && !r#type.eq_ignore_ascii_case("text") { + return None; + } + let subtype = subtype.trim(); + let subtype = subtype.strip_prefix("x-").unwrap_or(subtype); + for (candidate_subtype, candidate_id) in MEDIA_SUBTYPES { + if candidate_subtype.eq_ignore_ascii_case(subtype) { + return Some(candidate_id); } - "text/csv" => Some(Self::Csv), - "text/tab-separated-values" | "text/tsv" => Some(Self::Tsv), - _ => None, } + None } /// Looks for a known format from an extension. @@ -120,13 +158,27 @@ impl QueryResultsFormat { /// ``` #[inline] pub fn from_extension(extension: &str) -> Option { - match extension { - "srx" | "xml" => Some(Self::Xml), - "srj" | "json" => Some(Self::Json), - "csv" | "txt" => Some(Self::Csv), - "tsv" => Some(Self::Tsv), - _ => None, + const MEDIA_TYPES: [(&str, QueryResultsFormat); 7] = [ + ("csv", QueryResultsFormat::Csv), + ("json", QueryResultsFormat::Json), + ("srj", QueryResultsFormat::Json), + ("srx", QueryResultsFormat::Xml), + ("tsv", QueryResultsFormat::Tsv), + ("txt", QueryResultsFormat::Csv), + ("xml", QueryResultsFormat::Xml), + ]; + for (candidate_extension, candidate_id) in MEDIA_TYPES { + if candidate_extension.eq_ignore_ascii_case(extension) { + return Some(candidate_id); + } } + None + } +} + +impl fmt::Display for QueryResultsFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) } } diff --git a/server/src/main.rs b/server/src/main.rs index 038f5d05..e32a1fe7 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -1525,108 +1525,96 @@ impl From for GraphName { fn rdf_content_negotiation(request: &Request) -> Result { content_negotiation( request, + RdfFormat::from_media_type, + RdfFormat::NQuads, &[ - "application/n-quads", - "application/n-triples", - "application/rdf+xml", - "application/trig", - "application/turtle", - "application/xml", - "application/x-trig", - "application/x-turtle", - "text/n3", - "text/nquads", - "text/plain", - "text/turtle", - "text/xml", - "text/x-nquads", + ("application", RdfFormat::NQuads), + ("text", RdfFormat::NQuads), ], - RdfFormat::from_media_type, + "application/n-quads or text/turtle", ) } fn query_results_content_negotiation(request: &Request) -> Result { content_negotiation( request, + QueryResultsFormat::from_media_type, + QueryResultsFormat::Json, &[ - "application/json", - "application/sparql-results+json", - "application/sparql-results+xml", - "application/xml", - "text/csv", - "text/json", - "text/tab-separated-values", - "text/tsv", - "text/xml", + ("application", QueryResultsFormat::Json), + ("text", QueryResultsFormat::Json), ], - QueryResultsFormat::from_media_type, + "application/sparql-results+json or text/tsv", ) } -fn content_negotiation( +fn content_negotiation( request: &Request, - supported: &[&str], parse: impl Fn(&str) -> Option, + default: F, + default_by_base: &[(&str, F)], + example: &str, ) -> Result { - let default = HeaderValue::default(); + let default_value = HeaderValue::default(); let header = request .header(&HeaderName::ACCEPT) - .unwrap_or(&default) + .unwrap_or(&default_value) .to_str() .map_err(|_| bad_request("The Accept header should be a valid ASCII string"))?; if header.is_empty() { - return parse(supported.first().unwrap()) - .ok_or_else(|| internal_server_error("Unknown media type")); + return Ok(default); } let mut result = None; let mut result_score = 0_f32; - - for possible in header.split(',') { - let (possible, parameters) = possible.split_once(';').unwrap_or((possible, "")); + for mut possible in header.split(',') { + let mut score = 1.; + if let Some((possible_type, last_parameter)) = possible.rsplit_once(';') { + if let Some((name, value)) = last_parameter.split_once('=') { + if name.trim().eq_ignore_ascii_case("q") { + score = f32::from_str(value.trim()).map_err(|_| { + bad_request(format!("Invalid Accept media type score: {value}")) + })?; + possible = possible_type; + } + } + } + if score <= result_score { + continue; + } let (possible_base, possible_sub) = possible + .split_once(';') + .unwrap_or((possible, "")) + .0 .split_once('/') .ok_or_else(|| bad_request(format!("Invalid media type: '{possible}'")))?; let possible_base = possible_base.trim(); let possible_sub = possible_sub.trim(); - let mut score = 1.; - for parameter in parameters.split(';') { - let parameter = parameter.trim(); - if let Some(s) = parameter.strip_prefix("q=") { - score = f32::from_str(s.trim()) - .map_err(|_| bad_request(format!("Invalid Accept media type score: {s}")))? + let mut format = None; + if possible_base == "*" && possible_sub == "*" { + format = Some(default); + } else if possible_sub == "*" { + for (base, sub_format) in default_by_base { + if *base == possible_base { + format = Some(*sub_format); + } } + } else { + format = parse(possible); } - if score <= result_score { - continue; - } - for candidate in supported { - let (candidate_base, candidate_sub) = candidate - .split_once(';') - .map_or(*candidate, |(p, _)| p) - .split_once('/') - .ok_or_else(|| { - internal_server_error(format!("Invalid media type: '{possible}'")) - })?; - if (possible_base == candidate_base || possible_base == "*") - && (possible_sub == candidate_sub || possible_sub == "*") - { - result = Some(candidate); - result_score = score; - break; - } + if let Some(format) = format { + result = Some(format); + result_score = score; } } - let result = result.ok_or_else(|| { + result.ok_or_else(|| { ( Status::NOT_ACCEPTABLE, - format!("The available Content-Types are {}", supported.join(", "),), + format!("The accept header does not provide any accepted format like {example}"), ) - })?; - - parse(result).ok_or_else(|| internal_server_error("Unknown media type")) + }) } fn content_type(request: &Request) -> Option { @@ -2344,6 +2332,21 @@ mod tests { ) } + #[test] + fn get_query_accept_substar() -> Result<()> { + let request = Request::builder( + Method::GET, + "http://localhost/query?query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}" + .parse()?, + ) + .with_header(HeaderName::ACCEPT, "text/*")? + .build(); + ServerTest::new()?.test_body( + request, + "{\"head\":{\"vars\":[\"s\",\"p\",\"o\"]},\"results\":{\"bindings\":[]}}", + ) + } + #[test] fn get_query_accept_good() -> Result<()> { let request = Request::builder( @@ -2366,13 +2369,55 @@ mod tests { fn get_query_accept_bad() -> Result<()> { let request = Request::builder( Method::GET, - "http://localhost/query?query=SELECT%20*%20WHERE%20{%20?s%20?p%20?o%20}".parse()?, + "http://localhost/query?query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}" + .parse()?, ) .with_header(HeaderName::ACCEPT, "application/foo")? .build(); ServerTest::new()?.test_status(request, Status::NOT_ACCEPTABLE) } + #[test] + fn get_query_accept_explicit_priority() -> Result<()> { + let request = Request::builder( + Method::GET, + "http://localhost/query?query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}" + .parse()?, + ) + .with_header(HeaderName::ACCEPT, "text/foo;q=0.5 , text/json ; q = 0.7")? + .build(); + ServerTest::new()?.test_body( + request, + "{\"head\":{\"vars\":[\"s\",\"p\",\"o\"]},\"results\":{\"bindings\":[]}}", + ) + } + + #[test] + fn get_query_accept_implicit_priority() -> Result<()> { + let request = Request::builder( + Method::GET, + "http://localhost/query?query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}" + .parse()?, + ) + .with_header(HeaderName::ACCEPT, "text/json,text/foo")? + .build(); + ServerTest::new()?.test_body( + request, + "{\"head\":{\"vars\":[\"s\",\"p\",\"o\"]},\"results\":{\"bindings\":[]}}", + ) + } + #[test] + fn get_query_accept_implicit_and_explicit_priority() -> Result<()> { + let request = Request::builder( + Method::GET, + "http://localhost/query?query=SELECT%20?s%20?p%20?o%20WHERE%20{%20?s%20?p%20?o%20}" + .parse()?, + ) + .with_header(HeaderName::ACCEPT, "text/foo;q=0.9,text/csv")? + .build(); + ServerTest::new()?.test_body(request, "s,p,o\r\n") + } + #[test] fn get_bad_query() -> Result<()> { ServerTest::new()?.test_status(