Skip to content

Commit

Permalink
IRI proper implementation: ipv4 & ipv6
Browse files Browse the repository at this point in the history
  • Loading branch information
nbittich committed Nov 24, 2024
1 parent 13a60e4 commit 081391a
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 3 deletions.
24 changes: 24 additions & 0 deletions examples/turtle_doc/input/0028.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# A triple with all absolute IRIs
<http://one.example/subject1> <http://one.example/predicate1> <http://one.example/object1> .

@base <http://one.example/> .
<subject2> <predicate2> <object2> . # relative IRIs, e.g. http://one.example/subject2

BASE <http://one.example/>
<subject2> <predicate2> <object2> . # relative IRIs, e.g. http://one.example/subject2

@prefix p: <http://two.example/> .
p:subject3 p:predicate3 p:object3 . # prefixed name, e.g. http://two.example/subject3

PREFIX p: <http://two.example/>
p:subject3 p:predicate3 p:object3 . # prefixed name, e.g. http://two.example/subject3

@prefix p: <path/> . # prefix p: now stands for http://one.example/path/
p:subject4 p:predicate4 p:object4 . # prefixed name, e.g. http://one.example/path/subject4

@prefix : <http://another.example/> . # empty prefix
:subject5 :predicate5 :object5 . # prefixed name, e.g. http://another.example/subject5

:subject6 a :subject7 . # same as :subject6 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> :subject7 .

<http://伝言.example/?user=أكرم&amp;channel=R%26D> a :subject8 . # a multi-script subject IRI .
7 changes: 7 additions & 0 deletions examples/turtle_doc/output/0028.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<http://two.example/subject3> <http://two.example/predicate3> <http://two.example/object3> .
<http://another.example/subject5> <http://another.example/predicate5> <http://another.example/object5> .
<http://one.example/path/subject4> <http://one.example/path/predicate4> <http://one.example/path/object4> .
<http://one.example/subject2> <http://one.example/predicate2> <http://one.example/object2> .
<http://another.example/subject6> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://another.example/subject7> .
<http://one.example/subject1> <http://one.example/predicate1> <http://one.example/object1> .
<http://伝言.example/?user=أكرم&amp;channel=R%26D> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://another.example/subject8> .
190 changes: 190 additions & 0 deletions src/iri.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#![allow(unused)]

use std::{collections::VecDeque, ops::RangeBounds};

use chrono::ParseResult;
use nom::{
bytes::complete::take_while_m_n,
character::complete::one_of,
combinator::{success, verify},
error::{ParseError, VerboseError},
multi::{many1, many_m_n},
};

use crate::prelude::*;

pub enum Segment {
Hextet(u16),
Compressed,
IpV4(Vec<u8>),
}
fn parse_ip_v6(s: &str) -> ParserResult<Vec<u16>> {
fn hex_to_u16(input: &str) -> Result<u16, std::num::ParseIntError> {
u16::from_str_radix(input, 16)
}
fn recognize_hexadecimal(input: &str) -> ParserResult<&str> {
recognize(take_while_m_n(1, 4, |c: char| c.is_ascii_hexdigit()))(input)
}
fn hextet(s: &str) -> ParserResult<u16> {
map_res(recognize_hexadecimal, hex_to_u16)(s)
};
fn segment(s: &str) -> ParserResult<Segment> {
alt((
map(tag("::"), |_| Segment::Compressed),
preceded(tag(":"), map(parse_ip_v4, Segment::IpV4)),
preceded(opt(tag(":")), map(hextet, Segment::Hextet)),
))(s)
}
let mut ipv6: Vec<u16> = vec![];
let (rest, list) = verify(many_m_n(1, 8, segment), |l: &[Segment]| {
l.iter()
.filter(|seg| matches!(seg, Segment::Compressed))
.count()
<= 1
&& l.iter()
.filter(|seg| matches!(seg, Segment::IpV4(_)))
.count()
<= 1
})(s)?;

let mut compression_pos = None;
for (idx, segment) in list.into_iter().enumerate() {
match segment {
Segment::Hextet(v) => ipv6.push(v),
Segment::Compressed => {
compression_pos = Some(idx);
}
Segment::IpV4(l) => {
ipv6.push((l[0] as u16) << 8 | l[1] as u16);
ipv6.push((l[2] as u16) << 8 | l[3] as u16);
}
}
}
if let Some(idx) = compression_pos {
let len = ipv6.len();
while ipv6.len() < 8 {
ipv6.insert(idx, 0x0);
}
}

Ok((rest, ipv6))
}
fn parse_ip_v4(s: &str) -> ParserResult<Vec<u8>> {
verify(
separated_list1(
tag("."),
verify(
map_parser(take_while1(|c: char| c.is_numeric()), all_consuming(U8)),
|num: &u8| num <= &255,
),
),
|list: &[u8]| list.len() == 4,
)(s)
}

#[cfg(test)]
mod test {
use crate::iri::{parse_ip_v4, parse_ip_v6};

#[test]
fn parse_ip_v4_test() {
assert_eq!(
parse_ip_v4("192.168.0.1").unwrap(),
("", [192, 168, 0, 1].to_vec())
);
assert_eq!(
parse_ip_v4("127.0.0.1").unwrap(),
("", [127, 0, 0, 1].to_vec())
);
assert_eq!(parse_ip_v4("8.8.8.8").unwrap(), ("", [8, 8, 8, 8].to_vec()));
assert_eq!(
parse_ip_v4("255.255.255.255").unwrap(),
("", [255, 255, 255, 255].to_vec())
);
assert!(parse_ip_v4("256.1.1.1").is_err());
assert!(parse_ip_v4("192.168.0").is_err());
assert!(parse_ip_v4("192.168..1").is_err());
}

#[test]
fn parse_ip_v6_test() {
assert_eq!(
parse_ip_v6("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap(),
(
"",
[0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x370, 0x7334].into()
)
);
assert_eq!(
parse_ip_v6("2001:0db8:0000:0000:0000:0000:0000:0001").unwrap(),
("", [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1].into())
);

assert_eq!(
parse_ip_v6("2001:0db8:0000:0000:0000:ff00:0042:8329").unwrap(),
("", [0x2001, 0x0db8, 0, 0, 0, 0xff00, 0x42, 0x8329].into())
);

assert_eq!(
parse_ip_v6("2001:db8:0:0:0:ff00:42:8329").unwrap(),
("", [0x2001, 0x0db8, 0, 0, 0, 0xff00, 0x42, 0x8329].into())
);

assert!(parse_ip_v6("2001:db8::::ff00:42:8329").is_err());
assert_eq!(
parse_ip_v6("::ffff:192.0.2.128").unwrap(),
("", [0, 0, 0, 0, 0, 0xffff, 0xc000, 0x280].into())
);
let test_cases = [
(
"2001:0db8:85a3:0000:0000:8a2e:0370:7334",
vec![
0x2001, 0x0db8, 0x85a3, 0x0000, 0x0000, 0x8a2e, 0x0370, 0x7334,
],
),
(
"2001:db8:85a3::8a2e:370:7334",
vec![
0x2001, 0xdb8, 0x85a3, 0x0000, 0x0000, 0x8a2e, 0x0370, 0x7334,
],
),
(
"2001:db8:85a3:0:0:8a2e:0370:7334",
vec![
0x2001, 0xdb8, 0x85a3, 0x0000, 0x0000, 0x8a2e, 0x0370, 0x7334,
],
),
(
"2001:db8::370:7334",
vec![0x2001, 0xdb8, 0x0, 0x0, 0x0, 0x0, 0x370, 0x7334],
),
(
"2001:0db8:0000:0000:0000:ff00:0042:8329",
vec![0x2001, 0x0db8, 0x0, 0x0, 0x0, 0xff00, 0x42, 0x8329],
),
(
"fe80::1ff:fe23:4567:890a",
vec![0xfe80, 0x0, 0x0, 0x0, 0x1ff, 0xfe23, 0x4567, 0x890a],
),
(
"0:0:0:0:0:0:0:0",
vec![0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0],
),
(
"0:0:0:0:0:0:0:1",
vec![0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1],
),
(
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
vec![
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
],
),
];

for (addr, expected) in test_cases.into_iter() {
let result = parse_ip_v6(addr).unwrap();
assert_eq!(result, ("", expected),);
}
}
}
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
pub mod iri;
mod shared;
mod string_parser;
mod triple_common_parser;
pub mod turtle;

pub mod prelude {
use nom::error::VerboseError;
pub use nom::{
Expand All @@ -14,7 +14,7 @@ pub mod prelude {
character::{
complete::{
alphanumeric1, char, i64 as I64, line_ending, multispace0, multispace1, space0,
space1, u32 as U32,
space1, u16 as U16, u32 as U32, u8 as U8,
},
is_alphanumeric, is_space,
},
Expand Down
2 changes: 1 addition & 1 deletion src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ fn cmp_input_file(
.replace(" ", "<SPACE>"),
);
}
assert_eq!(input.len(), output.len());
if !diff.is_empty() {
println!("========== Differences ==========");
println!("{diff}");
println!("========== Differences ==========");
}
assert_eq!(diff.len(), 0);
assert_eq!(input.len(), output.len());
}
}
mod triple_common_parser_test_misc;
Expand Down
1 change: 1 addition & 0 deletions src/tests/turtle_doc_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const INPUT_DIR: &str = "examples/turtle_doc";
#[test_case("0025", None , false ; "EQ: test date 20/09/2012")]
#[test_case("0026", None , false ; "EQ: test date 2023-08-30T10:31:00.080Z")]
#[test_case("0027", None , true ; "JSON: test simple json result with bnode")]
// #[test_case("0028", None , false ; "The following Turtle document contains examples of all the different ways of writing IRIs in Turtle.")]
#[serial]
fn test_turtle_doc(test_name: &str, diff_file: Option<&str>, output_json: bool) {
reset_fake_uuid_gen();
Expand Down

0 comments on commit 081391a

Please sign in to comment.