Skip to content

Commit

Permalink
refactor tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nbittich committed Nov 23, 2024
1 parent 18ff897 commit c2cc34d
Show file tree
Hide file tree
Showing 24 changed files with 7,424 additions and 95 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ lto = "thin"

[dev-dependencies]
serial_test = "3.1.1"
test-case = "3.3.1"

1 change: 1 addition & 0 deletions tests/modelE.ttl → examples/turtle_doc/diff/0004.ttl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@



<http://mu.semte.ch/streams/ldes-mow-register> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://w3id.org/ldes#EventStream> .

<http://mu.semte.ch/streams/ldes-mow-register> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/tree#Collection> .
Expand Down
7 changes: 7 additions & 0 deletions examples/turtle_doc/diff/0005.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@prefix : <http://xmlns.com/foaf/0.1/> .

<mailto:[email protected]>
a :Person ;
:name "Anne Example-Person2" ;
:interest <http://www.foaf-project.org/> ,
<http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/> .
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions tests/modelC.ttl → examples/turtle_doc/input/0003.ttl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@


<http://mu.semte.ch/streams/ldes-mow-register> a <http://w3id.org/ldes#EventStream>, <https://w3id.org/tree#Collection>;
<https://w3id.org/tree#view> <./1>;
<http://w3id.org/ldes#timestampPath> <http://www.w3.org/ns/prov#generatedAtTime>;
Expand Down
1 change: 1 addition & 0 deletions tests/modelD.ttl → examples/turtle_doc/input/0004.ttl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@


<http://mu.semte.ch/streams/ldes-mow-register> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://w3id.org/ldes#EventStream> .

<http://mu.semte.ch/streams/ldes-mow-register> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/tree#Collection> .
Expand Down
7 changes: 7 additions & 0 deletions examples/turtle_doc/input/0005.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

@prefix : <http://xmlns.com/foaf/0.1/> .

<mailto:[email protected]>
a :Person ;
:name "Anne Example-Person" ;
:interest <http://www.foaf-project.org/> , <http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/> .
13 changes: 13 additions & 0 deletions examples/turtle_doc/input/0006.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@prefix ex: <http://example.org/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex:Resource1 rdfs:comment "
This is a long comment
that spans multiple lines.
\n
It contains spaces, special characters like %, &, and @, and
even new lines. This format ensures the string is
properly encapsulated without breaking syntax rules.
\t\t
You can add as much text as needed here.
" .
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

<http://example.org/Agent1> <http://example.org/apiVersion> "1.0.0" .
<http://example.org/Agent1> <http://xmlns.com/foaf/0.1/name> "Semantic Generator Bot" .
<http://example.org/Agent1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Agent> .
Expand Down
3 changes: 2 additions & 1 deletion tests/modelF.ttl → examples/turtle_doc/output/0003.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/3> <http://www.w3.org/2004/02/skos/core#inScheme> <http://data.vlaanderen.be/id/conceptscheme/Verkeersbordconcept>.
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/3> <http://mu.semte.ch/vocabularies/ext/zonality> <http://lblod.data.gift/concepts/b651931b-923c-477c-8da9-fc7dd841fdcc>.
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/3> <http://purl.org/dc/terms/isVersionOf> <http://data.vlaanderen.be/id/concept/Verkeersbordconcept/f9312556b1bdfbb278ec04033417152abbb254466df0069ff1894d0ea7a55482>.
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/3> <http://www.w3.org/ns/prov#generatedAtTime> "2024-11-22T07:12:35.575Z"^^<http://www.w3.org/2001/XMLSchema#dateTime>.
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/3> <http://www.w3.org/ns/prov#generatedAtTime> "2024-11-22T07:12:35.575Z"^^<http://www.w3.org/2001/XMLSchema#dateTime>.

Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/b975c846-170e-4569-a488-722b70a546d8> <http://www.w3.org/ns/prov#generatedAtTime> "2024-11-21T13:35:50.038Z"^^<http://www.w3.org/2001/XMLSchema#dateTime>.
<http://mu.semte.ch/streams/ldes-mow-register> <https://w3id.org/tree#member> <http://mu.semte.ch/services/ldes-time-fragmenter/versioned/486bfa18-6bfd-461e-9688-c01e5fb9b49c>.
<http://mu.semte.ch/services/ldes-time-fragmenter/versioned/486bfa18-6bfd-461e-9688-c01e5fb9b49c> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.cidoc-crm.org/cidoc-crm/E54_Dimension>.
Expand Down
2 changes: 2 additions & 0 deletions examples/turtle_doc/output/0005.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

<mailto:[email protected]> <http://xmlns.com/foaf/0.1/name> "Anne Example-Person"^^<http://www.w3.org/2001/XMLSchema#string>
11 changes: 11 additions & 0 deletions examples/turtle_doc/output/0006.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<http://example.org/Resource1> <http://www.w3.org/2000/01/rdf-schema#comment> "
This is a long comment
that spans multiple lines.


It contains spaces, special characters like %, &, and @, and
even new lines. This format ensures the string is
properly encapsulated without breaking syntax rules.

You can add as much text as needed here.
"^^<http://www.w3.org/2001/XMLSchema#string>.
7,221 changes: 7,221 additions & 0 deletions examples/turtle_doc/output/0007.ttl

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod triple_common_parser;
pub mod turtle;

pub mod prelude {
use nom::error::VerboseError;
pub use nom::{
branch::alt,
bytes::complete::{
Expand All @@ -26,7 +27,7 @@ pub mod prelude {
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
AsChar, IResult, InputIter, ParseTo, Parser,
};
pub type ParserResult<'a, T> = IResult<&'a str, T>;
pub type ParserResult<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
}

pub mod grammar {
Expand All @@ -39,3 +40,6 @@ pub mod grammar {
pub const LANGTAG: &str = "@";
pub const BLANK_NODE_LABEL: &str = "_:";
}

#[cfg(test)]
mod tests;
54 changes: 50 additions & 4 deletions src/string_parser.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
use std::borrow::Cow;

/// copied from https://github.com/rust-bakery/nom/blob/7.1.3/examples/string.rs
use nom::branch::alt;
use nom::bytes::streaming::{is_not, take_while_m_n};
use nom::character::complete::anychar;
use nom::character::streaming::{char, multispace1};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0;
use nom::sequence::{delimited, preceded};
use nom::IResult;

use crate::prelude::ParserResult;

/// copied from https://github.com/rust-bakery/nom/blob/7.1.3/examples/string.rs
// parser combinators are constructed from the bottom up:
// first we write parsers for the smallest elements (escaped characters),
// then combine them into larger parsers.
Expand Down Expand Up @@ -83,6 +82,9 @@ fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
) -> IResult<&'a str, &'a str, E> {
preceded(char('\\'), multispace1)(input)
}
fn parse_escaped_anychar<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, char, E> {
preceded(char('\\'), anychar)(input)
}

/// Parse a non-empty block of text that doesn't include \ or "
fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
Expand All @@ -105,6 +107,7 @@ enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
EscapedAnychar(char),
}

/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
Expand All @@ -119,12 +122,16 @@ where
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
map(parse_escaped_anychar, StringFragment::EscapedAnychar),
))(input)
}

/// Parse a string. Use a loop of parse_fragment and push all of the fragments
/// into an output string.
pub(crate) fn parse_escaped_string(input: &str) -> ParserResult<Cow<'_, str>> {
pub(crate) fn parse_escaped_string<'a, E>(input: &'a str) -> IResult<&'a str, Cow<'a, str>, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
// fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop,
// and for each output value, calls a folding function on each output value.
let build_string = || {
Expand All @@ -140,6 +147,10 @@ pub(crate) fn parse_escaped_string(input: &str) -> ParserResult<Cow<'_, str>> {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
StringFragment::EscapedAnychar(c) => {
string.push('\\');
string.push(c)
}
}
string
},
Expand All @@ -153,3 +164,38 @@ pub(crate) fn parse_escaped_string(input: &str) -> ParserResult<Cow<'_, str>> {

map(build_string(), Cow::Owned)(input)
}

#[cfg(test)]
mod test {
use std::borrow::Cow;

use nom::{bytes::complete::tag, sequence::delimited};

use crate::prelude::ParserResult;

use super::parse_escaped_string;

#[test]
fn parse_escaped_string_with_tab() {
let input = "\"\n".to_string()
+ "This is a long comment\n"
+ "that spans multiple lines.\n\n"
+ "It contains spaces, special characters like %, &, and @, and \n"
+ "even new lines. This format ensures the string is \n"
+ "properly encapsulated without breaking syntax rules.\n"
+ "\t\t\n"
+ "You can add as much text as needed here.\"";
let expected = "\n".to_string()
+ "This is a long comment\n"
+ "that spans multiple lines.\n\n"
+ "It contains spaces, special characters like %, &, and @, and \n"
+ "even new lines. This format ensures the string is \n"
+ "properly encapsulated without breaking syntax rules.\n\t\t\n"
+ "You can add as much text as needed here.";

let v: ParserResult<Cow<'_, str>> =
delimited(tag("\""), parse_escaped_string, tag("\""))(&input);
let (_, res) = v.unwrap();
assert_eq!(res, expected);
}
}
67 changes: 67 additions & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use std::{path::PathBuf, str::FromStr};

use crate::turtle::turtle_doc::TurtleDoc;

fn cmp_input_file(
test_name: &str,
diff_file: Option<&str>,
directory: &str,
well_known_prefix: Option<String>,
) {
println!("running {test_name}");
let mut input_buf = String::new();
let mut output_buf = String::new();
let input = TurtleDoc::from_file(
PathBuf::from_str(directory)
.map(|p| p.join("input").join(format!("{test_name}.ttl")))
.unwrap(),
well_known_prefix.clone(),
&mut input_buf,
)
.unwrap();

let output = TurtleDoc::from_file(
PathBuf::from_str(directory)
.map(|p| p.join("output").join(format!("{test_name}.ttl")))
.unwrap(),
well_known_prefix.clone(),
&mut output_buf,
)
.unwrap();
if let Some(diff) = diff_file {
let mut diff_buf = String::new();
let diff = TurtleDoc::from_file(
PathBuf::from_str(directory)
.map(|p| p.join("diff").join(format!("{diff}.ttl")))
.unwrap(),
well_known_prefix,
&mut diff_buf,
)
.unwrap();
let input = input.difference(&diff).unwrap();
assert!(!input.is_empty());
assert_eq!(input.difference(&output).unwrap().len(), 0);
} else {
let diff = input.difference(&output).unwrap();
println!(
"{}",
input
.to_string()
.replace("\n", "<NEWLINE>")
.replace("\t", "<TAB>")
.replace(" ", "<SPACE>"),
);
println!("===");
println!(
"{}",
output
.to_string()
.replace("\n", "<NEWLINE>")
.replace("\t", "<TAB>")
.replace(" ", "<SPACE>"),
);
assert_eq!(input.len(), output.len());
assert_eq!(diff.len(), 0);
}
}
mod turtle_doc_test;
19 changes: 19 additions & 0 deletions src/tests/turtle_doc_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use serial_test::serial;
use test_case::test_case;

use crate::{tests::cmp_input_file, turtle::turtle_doc::reset_fake_uuid_gen};

const INPUT_DIR: &str = "examples/turtle_doc";

#[test_case("0001", None ; "EQ: complex document with blank nodes, nested objects, etc")]
#[test_case("0002", None ; "EQ: another complex document")]
#[test_case("0003", None ; "EQ: could not parse completely")]
#[test_case("0004", Some("0004") ; "DIFF: diff is buggy")]
#[test_case("0005", Some("0005") ; "DIFF: simple diff")]
#[test_case("0006", None ; "EQ: complex string with spaces")]
#[test_case("0007", None ; "EQ: complex string with spaces but more complex")]
#[serial]
fn test_turtle_doc(test_name: &str, diff_file: Option<&str>) {
reset_fake_uuid_gen();
cmp_input_file(test_name, diff_file, INPUT_DIR, None);
}
4 changes: 3 additions & 1 deletion src/triple_common_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ pub(crate) mod literal {
}
pub(crate) mod triple {

use nom::error::{ParseError, VerboseError};

use crate::grammar::BLANK_NODE_LABEL;
use crate::prelude::*;
use crate::shared::NS_TYPE;
Expand Down Expand Up @@ -382,7 +384,7 @@ pub(crate) mod triple {
.is_some()
|| bnode.chars().take(1).any(allowed_but_not_as_first)
{
let err: Error<&str> = make_error(s, ErrorKind::IsNot);
let err = VerboseError::from_error_kind(s, ErrorKind::IsNot);
return Err(nom::Err::Error(err));
}
let rest = &s[idx_bnode..];
Expand Down
Loading

0 comments on commit c2cc34d

Please sign in to comment.