From 18ff897c919edc2bb7e5c739f16be0e17860d557 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Sat, 23 Nov 2024 11:30:48 +0100 Subject: [PATCH] labeled bnode using proper rule --- src/triple_common_parser.rs | 52 ++++++++++------ src/turtle/turtle_parser.rs | 117 ++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 20 deletions(-) diff --git a/src/triple_common_parser.rs b/src/triple_common_parser.rs index 9c62376..fb3e185 100644 --- a/src/triple_common_parser.rs +++ b/src/triple_common_parser.rs @@ -259,10 +259,12 @@ pub(crate) mod literal { } } pub(crate) mod triple { + use crate::grammar::BLANK_NODE_LABEL; use crate::prelude::*; use crate::shared::NS_TYPE; use crate::triple_common_parser::{comments, paren_close, paren_open, BlankNode, Iri}; + use std::collections::VecDeque; pub(crate) fn object_list<'a, F1, F2, T>( @@ -353,28 +355,38 @@ pub(crate) mod triple { ), ) } + // https://www.w3.org/TR/turtle/ 2.6 RDF Blank Nodes pub(crate) fn labeled_bnode(s: &str) -> ParserResult { - let parse_label = delimited( - tag(BLANK_NODE_LABEL), - take_while(|s: char| { - !s.is_whitespace() - && s != '.' - && s != ';' - && s != '<' - && s != '(' - && s != '[' - && s != '"' - }), - space0, - ); - - map_res(preceded(multispace0, parse_label), |label: &str| { - if label.starts_with('.') || label.ends_with('.') || label.starts_with('-') { - let err: Error<&str> = make_error(label, ErrorKind::IsNot); - return Err(nom::Err::Error(err)); + fn allowed_but_not_as_first(c: char) -> bool { + matches!(c,'.' | '-' | '·' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}') + } + let (s, _) = preceded(multispace0, tag(BLANK_NODE_LABEL))(s)?; + let mut idx_bnode = 0; + for c in s.chars() { + if c.is_alphanum() || c == '_' || allowed_but_not_as_first(c) { + idx_bnode += c.len_utf8(); + } else { + break; } - Ok(BlankNode::Labeled(label)) - })(s) + } + let mut bnode: &str = &s[0..idx_bnode]; + if bnode.ends_with('.') { + idx_bnode -= '.'.len_utf8(); + bnode = &s[0..idx_bnode]; + } + if bnode.is_empty() + || bnode + .chars() + .last() + .filter(|c| allowed_but_not_as_first(*c)) + .is_some() + || bnode.chars().take(1).any(allowed_but_not_as_first) + { + let err: Error<&str> = make_error(s, ErrorKind::IsNot); + return Err(nom::Err::Error(err)); + } + let rest = &s[idx_bnode..]; + Ok((rest, BlankNode::Labeled(bnode))) } } pub(crate) fn comments(s: &str) -> ParserResult> { diff --git a/src/turtle/turtle_parser.rs b/src/turtle/turtle_parser.rs index afe6825..0965845 100644 --- a/src/turtle/turtle_parser.rs +++ b/src/turtle/turtle_parser.rs @@ -448,6 +448,123 @@ mod test { ); } + #[test] + fn labeled_bnode_using_proper_rules_test1() { + let s = "_:b.node :a :b"; + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Labeled("b.node",),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "a", + },)), + object: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "b", + },)), + },] + .into(), + } + ); + } + #[test] + fn labeled_bnode_using_proper_rules_test2() { + let s = "_:b-node :a :b"; + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Labeled("b-node",),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "a", + },)), + object: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "b", + },)), + },] + .into(), + } + ); + } + #[test] + fn labeled_bnode_using_proper_rules_test3() { + let s = "_:b·node :a :b"; + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Labeled("b·node",),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "a", + },)), + object: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "b", + },)), + },] + .into(), + } + ); + } + + #[test] + fn labeled_bnode_using_proper_rules_test4() { + let s = "_:b-jöhn :a :b"; + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Labeled("b-jöhn",),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "a", + },)), + object: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "b", + },)), + },] + .into(), + } + ); + } + + #[test] + fn labeled_bnode_using_proper_rules_test5() { + let s = "_:b_undertie‿node :a :b"; + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Labeled("b_undertie‿node",),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "a", + },)), + object: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "", + local_name: "b", + },)), + },] + .into(), + } + ); + } #[test] fn collection_test() { let s = r#":a :b ( "apple" "banana" ) ."#;