From 420b41215d0c663b5fc013b667ef801fdfcc0c88 Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Sat, 23 Nov 2024 09:59:36 +0100 Subject: [PATCH] use blanknodes --- src/shared.rs | 1 + src/triple_common_parser.rs | 2 +- src/turtle/turtle_doc.rs | 80 ++++++++++++++++++++------ src/turtle/turtle_parser.rs | 27 ++++++++- tests/expected_complex.ttl | 58 +++++++++---------- tests/expected_complex2.ttl | 110 ++++++++++++++++++------------------ tests/expected_other.ttl | 90 ++++++++++++++--------------- tests/labeled_bnode_err.ttl | 40 +++++++++++++ 8 files changed, 259 insertions(+), 149 deletions(-) create mode 100644 tests/labeled_bnode_err.ttl diff --git a/src/shared.rs b/src/shared.rs index b41eda3..ecf980b 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -12,6 +12,7 @@ pub const NS_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; pub const RDF_NIL: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"; pub const RDF_FIRST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"; pub const RDF_REST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"; +#[allow(unused)] pub const DEFAULT_WELL_KNOWN_PREFIX: &str = "http://example.org/.well-known/genid#"; pub static DATE_FORMATS: [&str; 9] = [ "%Y-%m-%dT%H:%M:%S%.3f%Z", diff --git a/src/triple_common_parser.rs b/src/triple_common_parser.rs index 17624f2..cc3f799 100644 --- a/src/triple_common_parser.rs +++ b/src/triple_common_parser.rs @@ -356,7 +356,7 @@ pub(crate) mod triple { pub(crate) fn labeled_bnode(s: &str) -> ParserResult { let parse_label = delimited( tag(BLANK_NODE_LABEL), - take_while(|s: char| !s.is_whitespace()), + take_while(|s: char| !s.is_whitespace() && s != '.' && s != ';' && s != '<'), space0, ); diff --git a/src/turtle/turtle_doc.rs b/src/turtle/turtle_doc.rs index 08216be..51a82f8 100644 --- a/src/turtle/turtle_doc.rs +++ b/src/turtle/turtle_doc.rs @@ -1,7 +1,8 @@ +use crate::grammar::BLANK_NODE_LABEL; use crate::shared::{ - DATE_FORMATS, DEFAULT_DATE_FORMAT, DEFAULT_DATE_TIME_FORMAT, DEFAULT_TIME_FORMAT, - DEFAULT_WELL_KNOWN_PREFIX, RDF_FIRST, RDF_NIL, RDF_REST, TIME_FORMATS, XSD_BOOLEAN, XSD_DATE, - XSD_DATE_TIME, XSD_DECIMAL, XSD_DOUBLE, XSD_INTEGER, XSD_TIME, + DATE_FORMATS, DEFAULT_DATE_FORMAT, DEFAULT_DATE_TIME_FORMAT, DEFAULT_TIME_FORMAT, RDF_FIRST, + RDF_NIL, RDF_REST, TIME_FORMATS, XSD_BOOLEAN, XSD_DATE, XSD_DATE_TIME, XSD_DECIMAL, XSD_DOUBLE, + XSD_INTEGER, XSD_TIME, }; use crate::triple_common_parser::{comments, Literal as ASTLiteral}; use crate::triple_common_parser::{BlankNode, Iri}; @@ -40,7 +41,7 @@ fn get_uuid() -> String { struct Context<'a> { base: Option<&'a str>, - well_known_prefix: String, + well_known_prefix: Option, prefixes: BTreeMap<&'a str, &'a str>, } #[derive(Serialize, PartialEq, Deserialize, Clone, Debug)] @@ -88,6 +89,7 @@ pub enum Node<'a> { Literal(Literal<'a>), Ref(Arc>), List(Vec>), + LabeledBlankNode(String), } #[derive(PartialEq, PartialOrd, Debug, Clone)] pub struct Statement<'a> { @@ -97,7 +99,7 @@ pub struct Statement<'a> { } #[derive(PartialEq, PartialOrd, Debug, Default)] pub struct TurtleDoc<'a> { - well_known_prefix: String, + well_known_prefix: Option, base: Option<&'a str>, prefixes: BTreeMap, Cow<'a, str>>, statements: Vec>, @@ -139,20 +141,35 @@ impl<'a> PartialEq for Node<'a> { match (self, other) { (Node::Iri(n1), Node::Iri(n2)) => n1.eq(n2), (Node::Literal(n1), Node::Literal(n2)) => n1 == n2, - (l @ Node::Literal(_) | l @ Node::Iri(_) | l @ Node::List(_), Node::Ref(n2)) => { - n2.as_ref().eq(l) - } - - (Node::Ref(n1), r @ Node::Iri(_) | r @ Node::Literal(_) | r @ Node::List(_)) => { - n1.as_ref().eq(r) - } + ( + l @ Node::Literal(_) + | l @ Node::Iri(_) + | l @ Node::List(_) + | l @ Node::LabeledBlankNode(_), + Node::Ref(n2), + ) => n2.as_ref().eq(l), + + ( + Node::Ref(n1), + r @ Node::Iri(_) + | r @ Node::Literal(_) + | r @ Node::List(_) + | r @ Node::LabeledBlankNode(_), + ) => n1.as_ref().eq(r), (Node::Ref(n1), Node::Ref(n2)) => n1 == n2, (Node::List(n1), Node::List(n2)) => n1 == n2, + (Node::LabeledBlankNode(n1), Node::LabeledBlankNode(n2)) => n1 == n2, (Node::Iri(_), Node::Literal(_)) + | (Node::Iri(_), Node::LabeledBlankNode(_)) | (Node::Iri(_), Node::List(_)) + | (Node::LabeledBlankNode(_), Node::Iri(_)) + | (Node::LabeledBlankNode(_), Node::Literal(_)) + | (Node::LabeledBlankNode(_), Node::List(_)) | (Node::Literal(_), Node::Iri(_)) + | (Node::Literal(_), Node::LabeledBlankNode(_)) | (Node::Literal(_), Node::List(_)) | (Node::List(_), Node::Iri(_)) + | (Node::List(_), Node::LabeledBlankNode(_)) | (Node::List(_), Node::Literal(_)) => false, } } @@ -407,8 +424,8 @@ impl<'a> TurtleDoc<'a> { turtle_values: Vec>, well_known_prefix: Option, ) -> Result { - let well_known_prefix = - well_known_prefix.unwrap_or_else(|| DEFAULT_WELL_KNOWN_PREFIX.to_string()); + // let well_known_prefix = + // well_known_prefix.unwrap_or_else(|| DEFAULT_WELL_KNOWN_PREFIX.to_string()); let mut context = Context { base: None, well_known_prefix, @@ -606,7 +623,6 @@ impl<'a> TurtleDoc<'a> { ctx: &'x Context, statements: &'x mut Vec>, ) -> Result, TurtleDocError> { - let well_known_prefix = ctx.well_known_prefix.as_str(); match value { v @ TurtleValue::Iri(_) | v @ TurtleValue::Literal(_) => { let prefixes: BTreeMap, Cow> = ctx @@ -618,11 +634,19 @@ impl<'a> TurtleDoc<'a> { Self::simple_turtle_value_to_node(v, base, prefixes, true) } TurtleValue::BNode(BlankNode::Labeled(label)) => { - Ok(Node::Iri(Cow::Owned(well_known_prefix.to_owned() + label))) + if let Some(well_known_prefix) = ctx.well_known_prefix.as_ref() { + Ok(Node::Iri(Cow::Owned(well_known_prefix.to_owned() + label))) + } else { + Ok(Node::LabeledBlankNode(label.into())) + } } TurtleValue::BNode(BlankNode::Unlabeled) => { let uuid = get_uuid(); - Ok(Node::Iri(Cow::Owned(format!("{well_known_prefix}{uuid}")))) + if let Some(well_known_prefix) = ctx.well_known_prefix.as_ref() { + Ok(Node::Iri(Cow::Owned(format!("{well_known_prefix}{uuid}")))) + } else { + Ok(Node::LabeledBlankNode(uuid.replace("-", ""))) + } } TurtleValue::Statement { subject, @@ -786,7 +810,14 @@ impl From<&Node<'_>> for RdfJsonNodeResult { fn from(value: &Node<'_>) -> Self { let typ_uri = "uri".into(); let typ_literal = "literal".into(); + let typ_bnode = "bnode".into(); match value { + Node::LabeledBlankNode(bnode) => RdfJsonNodeResult::SingleNode(RdfJsonNode { + typ: typ_bnode, + datatype: None, + lang: None, + value: bnode.into(), + }), Node::Iri(iri) => RdfJsonNodeResult::SingleNode(RdfJsonNode { typ: typ_uri, datatype: None, @@ -916,6 +947,9 @@ impl Display for Node<'_> { Node::Literal(Literal::Time(d)) => { write!(f, r#""{}"^^<{}>"#, d.format(DEFAULT_TIME_FORMAT), XSD_TIME) } + Node::LabeledBlankNode(bnode) => { + write!(f, "{BLANK_NODE_LABEL}{bnode}") + } Node::List(list) => { panic!("encountered node list where we shouldn't {list:?}"); } @@ -1214,6 +1248,15 @@ mod test { assert_eq!(9, triples.len()); } + #[test] + #[serial] + fn test_labeled_bnode_err() { + FAKE_UUID_GEN.store(0, std::sync::atomic::Ordering::SeqCst); + let mut buf_c = String::new(); + let turtle_c = + TurtleDoc::from_file("tests/labeled_bnode_err.ttl", None, &mut buf_c).unwrap(); + assert_eq!(turtle_c.len(), 40) + } #[test] #[serial] fn other_test() { @@ -1228,9 +1271,10 @@ mod test { println!("{}", turtle_expected.difference(&turtle_c).unwrap()); assert_eq!(turtle_c.difference(&turtle_expected).unwrap().len(), 0); } + #[test] #[serial] - fn complex_test() { + fn complex_test_with_bnode() { FAKE_UUID_GEN.store(0, std::sync::atomic::Ordering::SeqCst); let mut buf_c = String::new(); let mut buf_e = String::new(); diff --git a/src/turtle/turtle_parser.rs b/src/turtle/turtle_parser.rs index 00cfbb6..afe6825 100644 --- a/src/turtle/turtle_parser.rs +++ b/src/turtle/turtle_parser.rs @@ -261,7 +261,32 @@ mod test { ); } - #[test] // NORDINE + #[test] + fn predicate_labeled_bnode_test() { + let s = r#" + _:1. + + "#; + let (rest, res) = statements(s).unwrap(); + assert!(rest.trim().is_empty()); + assert_eq!( + res, + vec![TurtleValue::Statement { + subject: Box::new(TurtleValue::Iri(Iri::Enclosed( + "http://example.org/ns#ComplexResource", + ),)), + predicate_objects: [TurtleValue::PredicateObject { + predicate: Box::new(TurtleValue::Iri(Iri::Enclosed( + "http://example.org/ns#hasNestedObject", + ),)), + object: Box::new(TurtleValue::BNode(BlankNode::Labeled("1",),)), + },] + .into(), + },] + ); + } + + #[test] fn collection_test_from_other() { let s = r#" ex:techProducts ex:hasProducts ( ex:product1 ex:product2 )."#; diff --git a/tests/expected_complex.ttl b/tests/expected_complex.ttl index 21ea35d..4f1ec79 100644 --- a/tests/expected_complex.ttl +++ b/tests/expected_complex.ttl @@ -4,18 +4,18 @@ "2024-11-22T12:34:56Z"^^. "42"^^. "forty-two"@en. - . - "Nested Value"^^. - . - . - "Item1"^^. - "Item2"^^. - "Inner Value"^^. - . - . - . - . - . +_:1 . +_:1 "Nested Value"^^. +_:1 . + _:1. +_:2 "Item1"^^. +_:3 "Item2"^^. +_:5 "Inner Value"^^. +_:4 _:5. +_:4 . +_:3 _:4. +_:2 _:3. + _:2. "This resource demonstrates a variety of RDF features."@en. . . @@ -24,8 +24,8 @@ . "Super Type"@en. "A superclass for demonstration purposes."@en. - . - "I am a blank node."^^. +_:6 . +_:6 "I am a blank node."^^. "Título en Español"@es. "Title in English"@en. "Titre en Français"@fr. @@ -33,18 +33,18 @@ "123.45"^^. "true"^^. "custom-datatype"^^. - . - . - . - "Author Name"@en. - "First Item"^^. - "Nested Item 1"^^. - "Nested Item 2"^^. - . - . - . - "Second Item"^^. - . - . - . - . +_:7 . +_:7 . +_:7 . +_:7 "Author Name"@en. +_:8 "First Item"^^. +_:10 "Nested Item 1"^^. +_:11 "Nested Item 2"^^. +_:11 . +_:10 _:11. +_:9 _:10. +_:12 "Second Item"^^. +_:12 . +_:9 _:12. +_:8 _:9. + _:8. diff --git a/tests/expected_complex2.ttl b/tests/expected_complex2.ttl index 48bb115..66af529 100644 --- a/tests/expected_complex2.ttl +++ b/tests/expected_complex2.ttl @@ -5,17 +5,17 @@ . "International Standard Book Number"@en . . - . + _:17 . . - . - . - "Alice Smith" . - . +_:15 _:16 . +_:15 . +_:15 "Alice Smith" . +_:15 . . . "978-3-16-148410-0" . "2024-11-22"^^ . - . + _:1 . "Complex RDF Document Design"@en . . . @@ -24,63 +24,63 @@ . . . - "Tech Reviews Inc." . - "John Reviewer" . - . - . - "RDF" . - . - . - . - "Semantic Web" . - . - . - . +_:3 "Tech Reviews Inc." . +_:3 "John Reviewer" . +_:3 . +_:4 _:5 . +_:4 "RDF" . +_:8 _:10 . +_:8 _:9 . +_:6 . +_:6 "Semantic Web" . +_:12 . +_:12 _:13 . + _:7 . . - . + _:2 . "Advanced RDF Features"@en . . - . - . - . - "Advanced Topics"@en . - . - . - . - . - . - . - . +_:19 . +_:19 . +_:19 . +_:13 "Advanced Topics"@en . +_:7 . +_:7 . +_:7 . +_:10 _:12 . +_:10 _:11 . + _:14 . + _:8 . "Apprendre RDF avec des exemples"@fr . "Learning RDF with Examples"@en . . - . - . - . - . - . - . - "RDF Publishers" . - "Introduction to RDF"@en . - . - "Jane Doe" . - . - . - . - . - "Turtle" . - . +_:20 . +_:20 _:21 . +_:21 . +_:21 . +_:21 . +_:14 . +_:14 "RDF Publishers" . +_:9 "Introduction to RDF"@en . +_:16 . +_:16 "Jane Doe" . +_:16 . +_:17 _:18 . +_:17 . +_:5 _:6 . +_:5 "Turtle" . + _:15 . . . . "On the Formal Semantics of RDF" . . - . - . - "0.98"^^ . - . - "Jane Doe" . - . - "Complex Data Structures"@en . - . - . +_:2 _:4 . +_:2 _:3 . +_:2 "0.98"^^ . +_:1 . +_:1 "Jane Doe" . +_:1 . +_:11 "Complex Data Structures"@en . +_:18 _:20 . +_:18 _:19 . diff --git a/tests/expected_other.ttl b/tests/expected_other.ttl index 9b27f3e..ed6a91f 100644 --- a/tests/expected_other.ttl +++ b/tests/expected_other.ttl @@ -1,19 +1,19 @@ - "Charlie" . - . - "EMP003" . +_:2 "Charlie" . +_:2 . +_:2 "EMP003" . "Living Being" . . . - "94086" . - "Sunnyvale" . - "123 Main St." . - . - . - . - . - "1"^^ . - . - . +_:5 "94086" . +_:5 "Sunnyvale" . +_:5 "123 Main St." . +_:7 _:8 . +_:7 . +_:9 _:10 . +_:9 . +_:3 "1"^^ . +_:12 . +_:12 . . . . @@ -26,8 +26,8 @@ "Employee" . . . - . - . +_:10 . +_:10 . "Age of the person." . . . @@ -58,28 +58,28 @@ . . . - . - . - . - "EMP003" . - "Charlie" . - . - "Human Person" . - . - . +_:4 _:5 . +_:4 . +_:4 . +_:4 "EMP003" . +_:4 "Charlie" . +_:4 . +_:1 "Human Person" . +_:1 . +_:1 . . . . . "Company" . . - . - . - . - "Silicon Valley" . - "2024-06-15"^^ . - "Product Launch" . - . +_:11 . +_:11 . +_:11 . +_:11 "Silicon Valley" . +_:11 "2024-06-15"^^ . +_:11 "Product Launch" . +_:11 . . . . @@ -102,13 +102,13 @@ . . . - . + _:6 . . - "1"^^ . - "1"^^ . - . - . - . +_:12 "1"^^ . +_:12 "1"^^ . +_:3 . +_:3 . + _:2 . . "EMP002" . . @@ -120,7 +120,7 @@ . . . - . + _:9 . . . . @@ -131,12 +131,12 @@ "Silicon Valley" . "Tech Innovations Ltd." . . - . - . - . - . - . +_:8 . +_:8 . +_:6 _:7 . +_:6 . + _:1 . . - . - . + _:3 . + _:12 . . diff --git a/tests/labeled_bnode_err.ttl b/tests/labeled_bnode_err.ttl new file mode 100644 index 0000000..5f21cdb --- /dev/null +++ b/tests/labeled_bnode_err.ttl @@ -0,0 +1,40 @@ + . + . + "Complex Resource"^^. + "2024-11-22T12:34:56Z"^^. + "42"^^. + "forty-two"@en. +_:1 . +_:1 "Nested Value"^^. +_:1 . +_:2 "Item1"^^. +_:3 "Item2"^^. +_:5 "Inner Value"^^. +_:4 . + "This resource demonstrates a variety of RDF features."@en. + . + . + "Type 1"@en. + . + . + "Super Type"@en. + "A superclass for demonstration purposes."@en. +_:6 . +_:6 "I am a blank node."^^. + "Título en Español"@es. + "Title in English"@en. + "Titre en Français"@fr. + "123"^^. + "123.45"^^. + "true"^^. + "custom-datatype"^^. +_:7 . +_:7 . +_:7 . +_:7 "Author Name"@en. +_:8 "First Item"^^. +_:10 "Nested Item 1"^^. +_:11 "Nested Item 2"^^. +_:11 . +_:12 "Second Item"^^. +_:12 .