From e6da8bc4e75ee0887d0e38af52d9972db48f67a5 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Sun, 1 Dec 2024 13:22:15 +0100 Subject: [PATCH] only check if has scheme to determine if it's a relative uri (faster) --- src/iri.rs | 23 ++++++++++++++++------- src/tests/mod.rs | 1 + src/turtle/turtle_doc.rs | 10 ++++++---- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/iri.rs b/src/iri.rs index 562f5df..2954e00 100644 --- a/src/iri.rs +++ b/src/iri.rs @@ -79,7 +79,7 @@ use nom::{ combinator::complete, error::{ParseError, VerboseError}, }; -use parser::{parse_absolute_iri, parse_iri, parse_iri_reference}; +use parser::{parse_absolute_iri, parse_iri, parse_iri_reference, parse_scheme}; use crate::prelude::alt; @@ -111,6 +111,12 @@ impl IRI<'_> { pub fn is_relative(&self) -> bool { matches!(self, IRI::Reference(_)) } + pub fn has_scheme(s: &str) -> bool { + match parse_scheme(s) { + Ok((_, scheme)) => !scheme.is_empty(), + Err(_) => false, + } + } } mod ip { use nom::{ @@ -231,7 +237,7 @@ mod parser { pub(super) fn parse_iri(s: &str) -> ParserResult { map( tuple(( - terminated(parse_scheme, tag(":")), + parse_scheme, parse_i_hier_part, preceded(opt(tag("?")), parse_i_query), preceded(opt(tag("#")), parse_i_fragment), @@ -248,7 +254,7 @@ mod parser { map( tuple(( parse_scheme, - preceded(tag(":"), parse_i_hier_part), + parse_i_hier_part, preceded(opt(tag("?")), parse_i_query), )), |(scheme, hier_part, query)| IRI::Absolute { @@ -383,10 +389,13 @@ mod parser { tag("@"), ))(s) } - fn parse_scheme(s: &str) -> ParserResult<&str> { - verify( - take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-' || c == '+'), - |scheme: &str| scheme.starts_with(|c: char| c.is_alphabetic()), + pub(super) fn parse_scheme(s: &str) -> ParserResult<&str> { + terminated( + verify( + take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-' || c == '+'), + |scheme: &str| scheme.starts_with(|c: char| c.is_alphabetic()), + ), + tag(":"), )(s) } fn parse_userinfo(s: &str) -> ParserResult<&str> { diff --git a/src/tests/mod.rs b/src/tests/mod.rs index b18546b..a83d2b2 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -21,6 +21,7 @@ fn cmp_input_file( &mut input_buf, ) .unwrap(); + println!("len: {}", input.len()); if output_json { let f = File::open( diff --git a/src/turtle/turtle_doc.rs b/src/turtle/turtle_doc.rs index b63ca7c..2e25f21 100644 --- a/src/turtle/turtle_doc.rs +++ b/src/turtle/turtle_doc.rs @@ -408,10 +408,12 @@ impl<'a> TurtleDoc<'a> { ) -> Result, TurtleDocError> { match s { TurtleValue::Iri(Iri::Enclosed(iri)) => { - let iri_rfc3987 = IRI::try_from(iri).map_err(|e| TurtleDocError { - message: e.to_string(), - })?; - if iri_rfc3987.is_relative() { + // FIXME this is better but slow. + // Call if iri.is_relative instead of has_scheme + // let iri_rfc3987 = IRI::try_from(iri).map_err(|e| TurtleDocError { + // message: e.to_string(), + // })?; + if !IRI::has_scheme(iri) { if let Some(base) = base { let iri = (*base).to_owned() + iri; return Ok(Node::Iri(Cow::Owned(iri.to_string())));