From c57b18ea2c45488f723486f60d90c02fc0192fe3 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Fri, 22 Nov 2024 18:16:20 +0100 Subject: [PATCH] fixing parsing issues --- Cargo.toml | 3 + src/triple_common_parser.rs | 24 +++--- src/turtle/turtle_doc.rs | 57 +++++++++++++- src/turtle/turtle_parser.rs | 149 ++++++++++++++++++++++++++++++++++-- tests/complex.ttl | 60 +++++++++++++++ tests/expected_complex.ttl | 50 ++++++++++++ 6 files changed, 322 insertions(+), 21 deletions(-) create mode 100644 tests/complex.ttl create mode 100644 tests/expected_complex.ttl diff --git a/Cargo.toml b/Cargo.toml index 350c502..ab048a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,6 @@ codegen-units = 1 panic = 'abort' strip = "symbols" lto = "thin" + +[dev-dependencies] +serial_test = "3.1.1" diff --git a/src/triple_common_parser.rs b/src/triple_common_parser.rs index 223c9d5..5a6b235 100644 --- a/src/triple_common_parser.rs +++ b/src/triple_common_parser.rs @@ -274,7 +274,10 @@ pub(crate) mod triple { F2: FnMut(Vec) -> T, { map( - separated_list1(char(','), object_extractor), + separated_list1( + preceded(multispace0, terminated(char(','), multispace0)), + object_extractor, + ), move |mut list| { if list.len() > 1 { map_list(list) @@ -327,18 +330,15 @@ pub(crate) mod triple { where F: Fn(&'a str) -> ParserResult<'a, T>, { - preceded( - multispace0, - map( - preceded( - paren_open, - terminated( - separated_list0(multispace1, object_extractor), - preceded(multispace0, cut(paren_close)), - ), + map( + preceded( + paren_open, + terminated( + separated_list0(multispace1, object_extractor), + preceded(multispace0, cut(paren_close)), ), - VecDeque::from, ), + VecDeque::from, ) } pub(crate) fn anon_bnode<'a, F, T>(anon_parser: F) -> impl FnMut(&'a str) -> ParserResult<'a, T> @@ -378,7 +378,7 @@ pub(crate) fn comments(s: &str) -> ParserResult> { } pub(crate) fn paren_close(s: &str) -> ParserResult<&str> { - tag_no_space(")")(s) + preceded(multispace0, tag(")"))(s) } pub(crate) fn paren_open(s: &str) -> ParserResult<&str> { tag_no_space("(")(s) diff --git a/src/turtle/turtle_doc.rs b/src/turtle/turtle_doc.rs index 179de76..c161a84 100644 --- a/src/turtle/turtle_doc.rs +++ b/src/turtle/turtle_doc.rs @@ -21,7 +21,22 @@ use std::ops::Add; use std::path::PathBuf; use std::str::ParseBoolError; use std::sync::Arc; -use uuid::Uuid; + +#[cfg(test)] +static FAKE_UUID_GEN: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); + +#[cfg(not(test))] +fn get_uuid() -> String { + uuid::Uuid::new_v4().to_string() +} +#[cfg(test)] +fn get_uuid() -> String { + FAKE_UUID_GEN.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + format!( + "{}", + FAKE_UUID_GEN.load(std::sync::atomic::Ordering::SeqCst) + ) +} struct Context<'a> { base: Option<&'a str>, @@ -606,7 +621,7 @@ impl<'a> TurtleDoc<'a> { Ok(Node::Iri(Cow::Owned(well_known_prefix.to_owned() + label))) } TurtleValue::BNode(BlankNode::Unlabeled) => { - let uuid = Uuid::new_v4().to_string(); + let uuid = get_uuid(); Ok(Node::Iri(Cow::Owned(format!("{well_known_prefix}{uuid}")))) } TurtleValue::Statement { @@ -1036,8 +1051,9 @@ impl<'a> TryFrom<&'a RdfJsonTriple> for Statement<'a> { mod test { use crate::{ shared::XSD_STRING, - turtle::turtle_doc::{Literal, Node, RdfJsonTriple, Statement, TurtleDoc}, + turtle::turtle_doc::{Literal, Node, RdfJsonTriple, Statement, TurtleDoc, FAKE_UUID_GEN}, }; + use serial_test::serial; use std::borrow::Cow; use Cow::Borrowed; use Node::Iri; @@ -1045,6 +1061,7 @@ mod test { use super::RdfJsonNodeResult; #[test] + #[serial] fn turtle_doc_test() { let doc = include_str!("example/input.ttl"); let expected = include_str!("example/output.ttl"); @@ -1057,6 +1074,7 @@ mod test { assert_eq!(expected_turtle.to_string(), turtle.to_string()); } #[test] + #[serial] fn turtle_doc_bnode_test() { let doc = r#" @prefix foaf: . @@ -1075,6 +1093,7 @@ mod test { } #[test] + #[serial] fn turtle_doc_collection_test() { let s = r#" @prefix : . @@ -1084,6 +1103,7 @@ mod test { assert_eq!(5, turtle.statements.len()); } #[test] + #[serial] fn turtle_doc_add_test() { let doc1 = r#" @prefix : . @@ -1116,7 +1136,9 @@ mod test { let turtle4 = turtle + turtle3; assert_eq!(14, turtle4.statements.len()); } + #[test] + #[serial] fn turtle_doc_list_statements_test() { let doc = r#" @prefix foaf: . @@ -1139,6 +1161,7 @@ mod test { } #[test] + #[serial] fn parse_test() { let triple = r#" # this is a comment @@ -1166,7 +1189,9 @@ mod test { let triples: TurtleDoc = (triple, None).try_into().unwrap(); assert_eq!(triples.len(), 12); } + #[test] + #[serial] fn test_multi_comments() { let triples = r#" # the entire line is commented . @@ -1188,7 +1213,21 @@ mod test { assert_eq!(9, triples.len()); } + + #[test] + #[serial] + fn complex_test() { + FAKE_UUID_GEN.store(0, std::sync::atomic::Ordering::SeqCst); + let mut buf_c = String::new(); + let mut buf_e = String::new(); + + let turtle_c = TurtleDoc::from_file("tests/complex.ttl", None, &mut buf_c).unwrap(); + let turtle_expected = + TurtleDoc::from_file("tests/expected_complex.ttl", None, &mut buf_e).unwrap(); + assert_eq!(turtle_c.difference(&turtle_expected).unwrap().len(), 0); + } #[test] + #[serial] fn turtle_doc_could_not_parse_completely() { let mut buf_c = String::new(); let mut buf_f = String::new(); @@ -1201,6 +1240,7 @@ mod test { } #[test] + #[serial] fn turtle_doc_diff_buggy() { let mut buf_a = String::new(); let mut buf_b = String::new(); @@ -1214,7 +1254,9 @@ mod test { assert!(!diff.to_string().is_empty()); assert_eq!(diff, expected); } + #[test] + #[serial] fn turtle_doc_diff_test() { let mut buf_a = String::new(); let mut buf_b = String::new(); @@ -1229,7 +1271,9 @@ mod test { dbg!(&expected); assert_eq!(diff, expected); } + #[test] + #[serial] fn turtle_doc_to_json_test() { let doc = r#" @prefix foaf: . @@ -1248,7 +1292,9 @@ mod test { assert_eq!(json_triples.len(), turtle.len()); println!("{}", serde_json::to_string_pretty(&json_triples).unwrap()); } + #[test] + #[serial] fn test_convert_rdf_triple_to_doc() { let triple = RdfJsonTriple { subject: RdfJsonNodeResult::SingleNode(super::RdfJsonNode { @@ -1287,6 +1333,7 @@ mod test { } #[test] + #[serial] fn turtle_doc_to_json_bug_test() { let doc = r#" @prefix foaf: . @@ -1308,6 +1355,7 @@ mod test { } #[test] + #[serial] fn parse_date_test() { let examples = [ "2000-01-12T12:13:14Z", @@ -1339,6 +1387,7 @@ mod test { } #[test] + #[serial] fn parse_time_test() { let examples = ["00:00:00", "18:00"]; for example in examples { @@ -1356,7 +1405,9 @@ mod test { assert!(doc.is_ok()); } } + #[test] + #[serial] fn test_complex_str() { let s = include_str!("../../tests/49468c90-530b-11ee-8801-054ea2d949db.ttl"); let doc = TurtleDoc::try_from((s, None)).unwrap(); diff --git a/src/turtle/turtle_parser.rs b/src/turtle/turtle_parser.rs index e715385..be8d9d1 100644 --- a/src/turtle/turtle_parser.rs +++ b/src/turtle/turtle_parser.rs @@ -91,12 +91,15 @@ pub(crate) fn predicate(s: &str) -> ParserResult { } pub(crate) fn object(s: &str) -> ParserResult { - alt((iri_turtle, blank_node, collection_turtle, literal_turtle))(s) + preceded( + multispace0, + alt((iri_turtle, blank_node, collection_turtle, literal_turtle)), + )(s) } fn triples(s: &str) -> ParserResult { terminated( - predicate_lists(subject), + alt((predicate_lists(subject), anon_bnode_turtle)), preceded(multispace0, terminated(alt((tag("."), eof)), multispace0)), )(s) } @@ -117,12 +120,15 @@ pub fn statements(s: &str) -> IResult<&str, Vec> { #[cfg(test)] mod test { + use std::borrow::Cow; + use std::collections::VecDeque; + use crate::triple_common_parser::iri::prefixed_iri; use crate::triple_common_parser::prologue::{ base_sparql, base_turtle, prefix_sparql, prefix_turtle, }; - use crate::triple_common_parser::{BlankNode, Iri}; - use crate::turtle::turtle_parser::{labeled_bnode, triples}; + use crate::triple_common_parser::{BlankNode, Iri, Literal}; + use crate::turtle::turtle_parser::{labeled_bnode, statements, triples, TurtleValue}; #[test] fn base_test() { @@ -229,6 +235,31 @@ mod test { dbg!(res); } + #[test] + fn with_unlabeled_type_bnode_test() { + let s = r#" + [foaf:name "Bob"] . + "#; + let (rest, res) = statements(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + vec![TurtleValue::Statement { + subject: Box::new(TurtleValue::BNode(BlankNode::Unlabeled,)), + predicate_objects: vec![TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "foaf", + local_name: "name", + },)), + object: Box::new(TurtleValue::Literal(Literal::Quoted { + datatype: Some(Iri::Enclosed("http://www.w3.org/2001/XMLSchema#string",),), + value: Cow::Borrowed("Bob"), + lang: None, + },)), + },], + },] + ); + } #[test] fn unlabeled_nested_bnode() { let s = r#" @@ -240,14 +271,120 @@ mod test { foaf:mbox ] . "#; - let (_, res) = triples(s).unwrap(); + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); dbg!(res); let s = r#"[] foaf:knows [foaf:name "Bob"] ."#; - let (_, res) = triples(s).unwrap(); + let (rest, res) = triples(s).unwrap(); + assert!(rest.trim().is_empty()); + dbg!(res); } + #[test] + fn test_object_list() { + let s = r#" + @prefix ex: . + ex:ComplexResource ex:hasValue 42 , "forty-two"@en . + "#; + let (rest, s) = statements(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + s, + vec![ + TurtleValue::Prefix(("ex", Iri::Enclosed("http://example.org/ns#",),),), + TurtleValue::Statement { + subject: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "ex", + local_name: "ComplexResource", + },)), + predicate_objects: vec![TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "ex", + local_name: "hasValue", + },)), + object: Box::new(TurtleValue::ObjectList(vec![ + TurtleValue::Literal(Literal::Integer(42,),), + TurtleValue::Literal(Literal::Quoted { + datatype: None, + value: Cow::Borrowed("forty-two"), + lang: Some("en",), + },), + ],)), + },], + }, + ] + ); + } + + #[test] + fn list_with_nested_collection() { + let s = r#" + # RDF List with nested collections + ex:ComplexList ex:hasList ( + "First Item" + ( + "Nested Item 1" + "Nested Item 2" + ) + "Second Item" + ) . + "#; + let (rest, res) = statements(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + vec![TurtleValue::Statement { + subject: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "ex", + local_name: "ComplexList", + },)), + predicate_objects: vec![TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Prefixed { + prefix: "ex", + local_name: "hasList", + },)), + object: Box::new(TurtleValue::Collection(VecDeque::from([ + TurtleValue::Literal(Literal::Quoted { + datatype: Some(Iri::Enclosed( + "http://www.w3.org/2001/XMLSchema#string", + ),), + value: Cow::Borrowed("First Item"), + lang: None, + },), + TurtleValue::Collection( + [ + TurtleValue::Literal(Literal::Quoted { + datatype: Some(Iri::Enclosed( + "http://www.w3.org/2001/XMLSchema#string", + ),), + value: Cow::Borrowed("Nested Item 1"), + lang: None, + },), + TurtleValue::Literal(Literal::Quoted { + datatype: Some(Iri::Enclosed( + "http://www.w3.org/2001/XMLSchema#string", + ),), + value: "Nested Item 2".into(), + lang: None, + },), + ] + .into(), + ), + TurtleValue::Literal(Literal::Quoted { + datatype: Some(Iri::Enclosed( + "http://www.w3.org/2001/XMLSchema#string", + ),), + value: "Second Item".into(), + lang: None, + },), + ],))), + },], + },] + ); + } + #[test] fn collection_test() { let s = r#":a :b ( "apple" "banana" ) ."#; diff --git a/tests/complex.ttl b/tests/complex.ttl new file mode 100644 index 0000000..35eac8c --- /dev/null +++ b/tests/complex.ttl @@ -0,0 +1,60 @@ +@prefix ex: . +@prefix foaf: . +@prefix dc: . +@prefix xsd: . +@prefix owl: . +@prefix skos: . +@prefix rdf: . +@prefix rdfs: . +# A complex resource with multiple properties +ex:ComplexResource a ex:Type1 , ex:Type2 ; + foaf:name "Complex Resource" ; + dc:created "2024-11-22T12:34:56Z"^^xsd:dateTime ; + ex:hasValue 42 , "forty-two"@en ; + ex:hasNestedObject [ + a ex:NestedType ; + ex:nestedProperty "Nested Value" ; + ex:linksTo + ] ; + ex:hasCollection ( "Item1" "Item2" [ ex:innerProperty "Inner Value" ] ) ; + skos:note "This resource demonstrates a variety of RDF features."@en ; + owl:sameAs ex:AliasResource . + +# An example of OWL class hierarchy +ex:Type1 a owl:Class ; + rdfs:label "Type 1"@en ; + rdfs:subClassOf ex:SuperType . + +ex:SuperType a owl:Class ; + rdfs:label "Super Type"@en ; + rdfs:comment "A superclass for demonstration purposes."@en . + +# Blank node example +[] a ex:AnonymousType ; + ex:anonymousProperty "I am a blank node." . + +# Multilingual literal example +ex:MultilingualResource dc:title "Título en Español"@es , "Title in English"@en , "Titre en Français"@fr . + +# Datatype examples +ex:DataTypedLiterals ex:integerValue "123"^^xsd:integer ; + ex:decimalValue "123.45"^^xsd:decimal ; + ex:booleanValue "true"^^xsd:boolean ; + ex:customValue "custom-datatype"^^ex:CustomDatatype . + +# Reified statement example +[ a rdf:Statement ; + rdf:subject ex:ComplexResource ; + rdf:predicate dc:creator ; + rdf:object "Author Name"@en +] . + +# RDF List with nested collections +ex:ComplexList ex:hasList ( + "First Item" + ( + "Nested Item 1" + "Nested Item 2" + ) + "Second Item" +) . diff --git a/tests/expected_complex.ttl b/tests/expected_complex.ttl new file mode 100644 index 0000000..21ea35d --- /dev/null +++ b/tests/expected_complex.ttl @@ -0,0 +1,50 @@ + . + . + "Complex Resource"^^. + "2024-11-22T12:34:56Z"^^. + "42"^^. + "forty-two"@en. + . + "Nested Value"^^. + . + . + "Item1"^^. + "Item2"^^. + "Inner Value"^^. + . + . + . + . + . + "This resource demonstrates a variety of RDF features."@en. + . + . + "Type 1"@en. + . + . + "Super Type"@en. + "A superclass for demonstration purposes."@en. + . + "I am a blank node."^^. + "Título en Español"@es. + "Title in English"@en. + "Titre en Français"@fr. + "123"^^. + "123.45"^^. + "true"^^. + "custom-datatype"^^. + . + . + . + "Author Name"@en. + "First Item"^^. + "Nested Item 1"^^. + "Nested Item 2"^^. + . + . + . + "Second Item"^^. + . + . + . + .