From d6bb56b846fb5b196f7dc71055c75ec71e180d78 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Sat, 7 Oct 2023 00:18:28 +0200 Subject: [PATCH 1/8] feat(validator): improve uri and curie validation Improve URI and CURIE validation using regular expressions based on the corresponding official specifications. URI and CURIE validation mechanisms where reporting valid URIs (like a URN 'urn:abc:123') as invalid. This patch fixes this issue. Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/metamodelcore.py | 23 +- linkml_runtime/utils/uri_validator.py | 369 ++++++++++++++++++++++++++ 2 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 linkml_runtime/utils/uri_validator.py diff --git a/linkml_runtime/utils/metamodelcore.py b/linkml_runtime/utils/metamodelcore.py index a4c60f41..1df4e325 100644 --- a/linkml_runtime/utils/metamodelcore.py +++ b/linkml_runtime/utils/metamodelcore.py @@ -13,6 +13,10 @@ from linkml_runtime.utils.namespaces import Namespaces from linkml_runtime.utils.strictness import is_strict +from linkml_runtime.utils.uri_validator import validate_uri +from linkml_runtime.utils.uri_validator import validate_uri_reference +from linkml_runtime.utils.uri_validator import validate_curie + # Reference Decimal to make sure it stays in the imports _z = Decimal(1) @@ -105,10 +109,12 @@ def is_valid(cls, v: Union[str, URIRef, "Curie", "URIorCURIE"]) -> bool: if not isinstance(v, (str, URIRef, Curie, URIorCURIE)): return False v = str(v) - if ':' in v and '://' not in v: - return URIorCURIE.is_curie(v) + if validate_uri(v): + return True + elif validate_uri_reference(v): + return True else: - return URI.is_valid(v) + return URIorCURIE.is_curie(v) @staticmethod def is_absolute(v: str) -> bool: @@ -116,6 +122,8 @@ def is_absolute(v: str) -> bool: @staticmethod def is_curie(v: str, nsm: Optional[Namespaces] = None) -> bool: + if not validate_curie(v): + return False if ':' in v and '://' not in v: ns, ln = v.split(':', 1) return len(ns) == 0 or (NCName.is_valid(ns) and @@ -142,7 +150,12 @@ def __init__(self, v: str) -> None: @classmethod def is_valid(cls, v: str) -> bool: - return v is not None and not URIorCURIE.is_curie(v) and cls.uri_re.match(v) + if validate_uri(v): + return True + elif validate_uri_reference(v): + return True + else: + return False class Curie(URIorCURIE): @@ -174,6 +187,8 @@ def ns_ln(cls, v: str) -> Optional[Tuple[str, str]]: @classmethod def is_valid(cls, v: str) -> bool: + if not validate_curie(v): + return False pnln = cls.ns_ln(v) #return pnln is not None and (not pnln[0] or isinstance(pnln[0], PN_PREFIX)) return pnln is not None diff --git a/linkml_runtime/utils/uri_validator.py b/linkml_runtime/utils/uri_validator.py new file mode 100644 index 00000000..13154068 --- /dev/null +++ b/linkml_runtime/utils/uri_validator.py @@ -0,0 +1,369 @@ +# Copyright Siemens 2023 +# SPDX-License-Identifier: CC0-1.0 + +import re + +""" +Regular-expression-based URI and CURIE validation functions + +These regex are directly derived from the official sources mentioned in each +section. + +They should be processed with re.VERBOSE. + +Python named regular expression groups are being used to better understand the +URI/CURIE parsing. +""" + + +# ----------------------------------------------------------------------------- +# +### BASICS + +# Define DIGIT according RFC2234 section 3.4: +# https://datatracker.ietf.org/doc/html/rfc2234/#section-3.4 +DIGIT = r"[0-9]" + +# Define ALPHA according RFC2234 section 6.1: +# https://datatracker.ietf.org/doc/html/rfc2234/#section-6.1 +ALPHA = r"[A-Za-z]" + +# Define HEXDIG according RFC2234 section 6.1: +# https://datatracker.ietf.org/doc/html/rfc2234/#section-6.1 +HEXDIG = r"[0-9A-Fa-f]" + +# pct-encoded = "%" HEXDIG HEXDIG +pct_encoded = rf"% {HEXDIG} {HEXDIG}" + +# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +unreserved = rf"(?: {ALPHA} | {DIGIT} | \- | \. | _ | ~ )" + +# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +gen_delims = r"(?: : | / | \? | \# | \[ | \] | @ )" + +# sub-delims = "!" / "$" / "&" / "'" / "(" +sub_delims = r"(?: ! | \$ | & | ' | \( | \) | \* | \+ | , | ; | = )" + +# pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +pchar = rf"(?: {unreserved} | {pct_encoded} | {sub_delims} | : | @ )" + +# reserved = gen-delims / sub-delims +reserved = rf"(?: {gen_delims} | {sub_delims} )" + + +### required for Authority + +# dec-octet = DIGIT ; 0-9 +# / %x31-39 DIGIT ; 10-99 +# / "1" 2DIGIT ; 100-199 +# / "2" %x30-34 DIGIT ; 200-249 +# / "25" %x30-35 ; 250-255 +dec_octet = rf"""(?: {DIGIT} | + [1-9] {DIGIT} | + 1 {DIGIT}{{2}} | + 2 [0-4] {DIGIT} | + 25 [0-5] + ) +""" + +# IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +IPv4address = rf"{dec_octet} \. {dec_octet} \. {dec_octet} \. {dec_octet}" + +# h16 = 1*4HEXDIG +h16 = rf"(?: {HEXDIG} ){{1,4}}" + +# ls32 = ( h16 ":" h16 ) / IPv4address +ls32 = rf"(?: (?: {h16} : {h16} ) | {IPv4address} )" + +# IPv6address = 6( h16 ":" ) ls32 +# / "::" 5( h16 ":" ) ls32 +# / [ h16 ] "::" 4( h16 ":" ) ls32 +# / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +# / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +# / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +# / [ *4( h16 ":" ) h16 ] "::" ls32 +# / [ *5( h16 ":" ) h16 ] "::" h16 +# / [ *6( h16 ":" ) h16 ] "::" +IPv6address = rf"""(?: (?: {h16} : ){{6}} {ls32} | + :: (?: {h16} : ){{5}} {ls32} | + (?: {h16} )? :: (?: {h16} : ){{4}} {ls32} | + (?: (?: {h16} : ) {h16} )? :: (?: {h16} : ){{3}} {ls32} | + (?: (?: {h16} : ){{1,2}} {h16} )? :: (?: {h16} : ){{2}} {ls32} | + (?: (?: {h16} : ){{1,3}} {h16} )? :: {h16} : {ls32} | + (?: (?: {h16} : ){{1,4}} {h16} )? :: {ls32} | + (?: (?: {h16} : ){{1,5}} {h16} )? :: {h16} | + (?: (?: {h16} : ){{1,6}} {h16} )? :: + ) +""" + +# IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +IPvFuture = rf"v {HEXDIG}+ \. (?: {unreserved} | {sub_delims} | : )+" + +# IP-literal = "[" ( IPv6address / IPvFuture ) "]" +IP_literal = rf"\[ (?: {IPv6address} | {IPvFuture} ) \]" + +# reg-name = *( unreserved / pct-encoded / sub-delims ) +reg_name = rf"(?: {unreserved} | {pct_encoded} | {sub_delims} )*" + + +### required for Path + +# segment = *pchar +segment = rf"{pchar}*" + +# segment-nz = 1*pchar +segment_nz = rf"{pchar}+" + +# segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +segment_nz_nc = rf"(?: {unreserved} | {pct_encoded} | {sub_delims} | @ )+" + +# ----------------------------------------------------------------------------- +# +# Define SCHEME according RFC3986 section 3.1: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.1 +# + +# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +scheme = rf"(?P {ALPHA} (?: {ALPHA} | {DIGIT} | \+ | \- | \. )* )" + + +# ----------------------------------------------------------------------------- +# +# Define AUTHORITY according RFC3986 section 3.2: + +# Define USER INFORMATION according RFC3986 section 3.2.1: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.1 + +# userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +userinfo = rf"""(?P + (?: {unreserved} | {pct_encoded} | {sub_delims} | : )* + ) +""" + +# Define HOST according RFC3986 section 3.2.2: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 + +# host = IP-literal / IPv4address / reg-name +host = rf"(?P {IP_literal} | {IPv4address} | {reg_name} )" + +# Define PORT according RFC3986 section 3.2.3: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.3 + +# port = *DIGIT +port = rf"(?P ( {DIGIT} )* )" + +# Define AUTHORITY according RFC3986 section 3.2: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2 +# + +# authority = [ userinfo "@" ] host [ ":" port ] +#authority = rf"""(?: (?P {userinfo} ) @)? +authority = rf"""(?P + (?: {userinfo} @)? + {host} + (?: : {port} )? + ) +""" + + +# ----------------------------------------------------------------------------- +# +# Define different PATHs according RFC3986 section 3.3: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 +# + +# path-abempty = *( "/" segment ) +path_abempty = rf"( / {segment} )*" + +# path-absolute = "/" [ segment-nz *( "/" segment ) ] +path_absolute = rf"( / (?: {segment_nz} (?: / {segment} )* )? )" + +# path-noscheme = segment-nz-nc *( "/" segment ) +path_noscheme = rf"( {segment_nz_nc} (?: / {segment} )* )" + +# path-rootless = segment-nz *( "/" segment ) +path_rootless = rf"( {segment_nz} (?: / {segment} )* )" + +# path-empty = 0 +path_empty = r"" + +# path = path-abempty ; begins with "/" or is empty +# / path-absolute ; begins with "/" but not "//" +# / path-noscheme ; begins with a non-colon segment +# / path-rootless ; begins with a segment +# / path-empty ; zero characters +path = rf"""(?: + {path_abempty} | + {path_absolute} | + {path_noscheme} | + {path_rootless} | + {path_empty} + ) +""" + + +# ----------------------------------------------------------------------------- +# +# Define QUERY according RFC3986 section 3.4: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.4 +# + +# query = *( pchar / "/" / "?" ) +query = rf"(?P (?: {pchar} | / | \? )* )" + + +# ----------------------------------------------------------------------------- +# +# Define FRAGMENT according RFC3986 section 3.5: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3.5 +# + +# fragment = *( pchar / "/" / "?" ) +fragment = rf"(?P (?: {pchar} | / | \? )* )" + + +# ----------------------------------------------------------------------------- +# +# Define URI and HIERARCHICAL PATH according RFC3986 section 3: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-3 +# + +# hier-part = "//" authority path-abempty +# / path-absolute +# / path-rootless +# / path-empty +hier_part = rf"""(?P + (?: // {authority} {path_abempty} ) | + {path_absolute} | + {path_rootless} | + {path_empty} + ) +""" + + +# URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +URI = rf"""(?P + {scheme} : {hier_part} (?: \? {query} )? (?: \# {fragment} )? + ) +""" + + +# ----------------------------------------------------------------------------- +# +# Define RELATIVE REFERENCE according RFC3986 section 4.2: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-4.2 +# + +# relative-part = "//" authority path-abempty +# / path-absolute +# / path-noscheme +# / path-empty +# relative-ref = relative-part [ "?" query ] [ "#" fragment ] +relative_ref = rf"""(?P + (?: + (?: // + {authority} + (?P {path_abempty} ) + ) | + (?P {path_absolute} ) | + (?P {path_noscheme} ) | + (?P {path_empty} ) + ) + (?: \? {query} )? + (?: \# {fragment} )? + ) +""" + +# ----------------------------------------------------------------------------- +# +# Define ABSOLUTE URI according RFC3986 section 4.3: +# https://datatracker.ietf.org/doc/html/rfc3986/#section-4.3 +# + +# absolute-URI = scheme ":" hier-part [ "?" query ] +absolute_URI = rf"(?P {scheme} : {hier_part} (?: \? {query} )? )" + + +# ----------------------------------------------------------------------------- +# +# Define CURIE according W3C CURIE Syntax 1.0 +# https://www.w3.org/TR/curie/#s_syntax +# + +# NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | Extender +# !! IMPORTANT NOTE !! +# As of now this module doesn't support NCNameChar IRI, but +# relative-refs as defined in URI, +# NCNameChar ::= Letter | Digit | '.' | '-' | '_' +NCNameChar = rf"(?: {ALPHA} | {DIGIT} | \. | \- | _ )" + +# prefix := NCName +# NCName := (Letter | '_') (NCNameChar)* +prefix = rf"(?: {ALPHA} | _ ) (?: {NCNameChar} )*" + +# reference := irelative-ref (as defined in IRI) +# !! IMPORTANT NOTE !! +# As of now this module don't support irelative-refs as defined in IRI, but +# relative-refs as defined in URI +# curie := [ [ prefix ] ':' ] reference +# reference := relative-ref (as defined in URI) +CURIE = rf"""(?P + (?: (?P {prefix} )? : )? + {relative_ref} + ) +""" + +# safe_curie := '[' curie ']' +safe_CURIE = rf"""(?P + \[ {CURIE} \] + ) +""" + + +# ----------------------------------------------------------------------------- +# +### Compile the regular expressions for better performance + +uri_validator = re.compile("^{}$".format(URI), re.VERBOSE) + +#uri_ref_validator = re.compile("^{}$".format(URI_reference), re.VERBOSE) + +uri_relative_ref_validator = re.compile("^{}$".format(relative_ref), re.VERBOSE) + +abs_uri_validator = re.compile("^{}$".format(absolute_URI), re.VERBOSE) + +curie_validator = re.compile("^{}$".format(CURIE), re.VERBOSE) + +safe_curie_validator = re.compile("^{}$".format(safe_CURIE), re.VERBOSE) + +# ----------------------------------------------------------------------------- +# +### FUNCTIONS + + +def validate_uri(input): + return uri_validator.match(input) + + +def validate_uri_reference(input): + # ----------------------------------------------------------------------------- + # + # Define URI REFERENCE according RFC3986 section 4.1: + # https://datatracker.ietf.org/doc/html/rfc3986/#section-4.1 + # + + # URI-reference = URI / relative-ref + return uri_validator.match(input) or uri_relative_ref_validator.match(input) + + +def validate_absolute_uri(input): + return abs_uri_validator.match(input) + + +def validate_curie(input): + # print(CURIE) + return curie_validator.match(input) + + +def validate_safe_curie(input): + return safe_curie_validator.match(input) From 4db7c1cb393f5ac65b264db94c6fe5da4be12e53 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 11 Oct 2023 09:56:08 +0200 Subject: [PATCH 2/8] fix: invalid curie being used in example 'meaning' is declared to be Uriorcurie according [1], but at least one example being used for testing wasn't compliant (neither '[' nor ']' are allowed in the reference part of a curie) and therefore the introduced stricter URI and CURIE validation was failing. This patch fixes the issue. [1]: https://linkml.io/linkml-model/latest/docs/meaning/ Signed-off-by: Silvano Cirujano Cuesta --- tests/test_loaders_dumpers/input/phenopackets/constants.yaml | 4 ++-- tests/test_loaders_dumpers/models/phenopackets.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_loaders_dumpers/input/phenopackets/constants.yaml b/tests/test_loaders_dumpers/input/phenopackets/constants.yaml index b17077a3..e5fb97a4 100644 --- a/tests/test_loaders_dumpers/input/phenopackets/constants.yaml +++ b/tests/test_loaders_dumpers/input/phenopackets/constants.yaml @@ -322,7 +322,7 @@ enums: meaning: UCUM:degree DIOPTER: description: diopter - meaning: UCUM:[diop] + meaning: UCUM:%5Bdiop%5D GRAM: description: gram meaning: UCUM:g @@ -373,7 +373,7 @@ enums: meaning: UCUM:mm MILLIMETRES_OF_MERCURY: description: millimetres of mercury - meaning: UCUM:mm[Hg] + meaning: UCUM:mm%5BHg%5D MILLIMOLE: description: millimole meaning: UCUM:mmol diff --git a/tests/test_loaders_dumpers/models/phenopackets.py b/tests/test_loaders_dumpers/models/phenopackets.py index 5826c9d6..255a4578 100644 --- a/tests/test_loaders_dumpers/models/phenopackets.py +++ b/tests/test_loaders_dumpers/models/phenopackets.py @@ -3006,7 +3006,7 @@ class UnitTerms(EnumDefinitionImpl): meaning=UCUM.degree) DIOPTER = PermissibleValue(text="DIOPTER", description="diopter", - meaning=UCUM["[diop]"]) + meaning=UCUM["%5Bdiop%5D"]) GRAM = PermissibleValue(text="GRAM", description="gram", meaning=UCUM.g) @@ -3057,7 +3057,7 @@ class UnitTerms(EnumDefinitionImpl): meaning=UCUM.mm) MILLIMETRES_OF_MERCURY = PermissibleValue(text="MILLIMETRES_OF_MERCURY", description="millimetres of mercury", - meaning=UCUM["mm[Hg]"]) + meaning=UCUM["mm%5BHg%5D"]) MILLIMOLE = PermissibleValue(text="MILLIMOLE", description="millimole", meaning=UCUM.mmol) From 4416460bea92eac5eace2539d174ef2c7d3186fb Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 11 Oct 2023 17:16:54 +0200 Subject: [PATCH 3/8] fix: invalid curie being used in example 'source_nodes' is declared to be Uriorcurie according [1], but at least one example being used for testing wasn't compliant (' ! unit' is not allowed in the reference part of a curie) and therefore the introduced stricter URI and CURIE validation was failing. This patch fixes the issue. [1]: https://linkml.io/linkml-model/latest/docs/source_nodes/ Signed-off-by: Silvano Cirujano Cuesta --- tests/test_loaders_dumpers/input/phenopackets/cv_terms.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_loaders_dumpers/input/phenopackets/cv_terms.yaml b/tests/test_loaders_dumpers/input/phenopackets/cv_terms.yaml index 28c8bc93..a539d7df 100644 --- a/tests/test_loaders_dumpers/input/phenopackets/cv_terms.yaml +++ b/tests/test_loaders_dumpers/input/phenopackets/cv_terms.yaml @@ -110,7 +110,7 @@ enums: reachable_from: source_ontology: bioregistry:uo source_nodes: - - UO:0000000 ! unit + - UO:0000000 is_direct: false include_self: false relationship_types: From 100a71eaabd263008b9c8c3b46cc6711a893d5e6 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 11 Oct 2023 21:58:17 +0200 Subject: [PATCH 4/8] fix: validator accepts same-document reference uri Empty URIs are so-called same-document references and are legal URIs, therefore fixing the wrong text expectation (derived from the previous non-standard conform implementation). Signed-off-by: Silvano Cirujano Cuesta --- tests/test_processing/test_referencevalidator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_processing/test_referencevalidator.py b/tests/test_processing/test_referencevalidator.py index 5c541959..7f813799 100644 --- a/tests/test_processing/test_referencevalidator.py +++ b/tests/test_processing/test_referencevalidator.py @@ -1062,7 +1062,7 @@ def test_08_normalize_types(self): "uriorcurie": [ ("X:1", [], [], "X:1"), ("http://example.org", [], [], "http://example.org"), - ("", [], [ConstraintType.TypeConstraint], ""), + ("", [], [], ""), ("a b", [], [ConstraintType.TypeConstraint], "a b"), (None, [], [], None), ], From fad9cd4fa71cdadd8d44db9ca175815d26d886fa Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 11 Oct 2023 10:02:03 +0200 Subject: [PATCH 5/8] test(metamodel): fix testing Rename test for URIorCURIE types and also focus only on testing that class, moving tests applying to the URI class to the corresponding test. Also add/modify following tests: * 'abc:[123]' is an invalid CURIE. * ':123' is an invalid URI (but a valid CURIE) * '' is a valid URI (a same-document reference) * 'rdf:type' is a valid URI (also a valid CURIE) * 'urn:abc:123' is a valid URI (since it's a valid URN) Signed-off-by: Silvano Cirujano Cuesta --- tests/test_utils/test_metamodelcore.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/test_utils/test_metamodelcore.py b/tests/test_utils/test_metamodelcore.py index cad39f86..4fdc7bc3 100644 --- a/tests/test_utils/test_metamodelcore.py +++ b/tests/test_utils/test_metamodelcore.py @@ -31,17 +31,16 @@ def test_ncname(self): with self.assertRaises(ValueError): NCName('A12!') - def test_uris(self): + def test_uriorcuries(self): """ Test the URI and URIorCURIE types """ str1 = "https://google.com/test#file?abc=1&def=4" self.assertEqual(str1, URIorCURIE(str1)) - self.assertEqual(str1, URI(str1)) str2 = "abc:123" self.assertEqual(str2, URIorCURIE(str2)) str3 = ":123" self.assertEqual(str3, URIorCURIE(str3)) with self.assertRaises(ValueError): - URI(str2) + URIorCURIE("abc:[def]") with self.assertRaises(ValueError): URIorCURIE("1abc:def") with self.assertRaises(ValueError): @@ -51,7 +50,6 @@ def test_uris(self): #with self.assertRaises(ValueError): # URIorCURIE("_") lax() - URI(str2) URIorCURIE("1abc:def") URIorCURIE("1:def") @@ -79,22 +77,25 @@ def test_curie(self): def test_uri(self): """ Test the URI data type """ + str1 = "https://google.com/test#file?abc=1&def=4" + self.assertEqual(str1, URI(str1)) self.assertEqual("http://foo.org/bargles", URI("http://foo.org/bargles")) - with self.assertRaises(ValueError): - URI("rdf:type") with self.assertRaises(ValueError): URI(":") + with self.assertRaises(ValueError): + URI(":123") # imports range is uriorcurie, so we allow file paths #URI("1") - URI("foo.bar") - URI("../a/b") + self.assertTrue(URI.is_valid("foo.bar")) + self.assertTrue(URI.is_valid("../a/b")) + self.assertTrue(URI.is_valid("abc:123")) #with self.assertRaises(ValueError): # URI("x1") - with self.assertRaises(ValueError): - URI("") - lax() + # an empty URI is a valid same-document URI reference + self.assertTrue(URI.is_valid("")) x = URI("rdf:type") - self.assertFalse(URI.is_valid(x)) + self.assertTrue(URI.is_valid(x)) + self.assertTrue(URI.is_valid("urn:abc:123")) def test_bool(self): self.assertTrue(Bool(True)) From 9e268e5054427792d1a448edaa5554db52533457 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Fri, 13 Oct 2023 15:08:10 +0200 Subject: [PATCH 6/8] tests(uri-validator): add additional tests Adding all URIs shown in the URI Wikipedia page [1] to the tests. [1]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Example_URIs Signed-off-by: Silvano Cirujano Cuesta --- tests/test_utils/test_metamodelcore.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_utils/test_metamodelcore.py b/tests/test_utils/test_metamodelcore.py index 4fdc7bc3..3d0fe8b2 100644 --- a/tests/test_utils/test_metamodelcore.py +++ b/tests/test_utils/test_metamodelcore.py @@ -96,6 +96,15 @@ def test_uri(self): x = URI("rdf:type") self.assertTrue(URI.is_valid(x)) self.assertTrue(URI.is_valid("urn:abc:123")) + self.assertTrue(URI.is_valid("https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top")) + self.assertTrue(URI.is_valid("ldap://[2001:db8::7]/c=GB?objectClass?one")) + self.assertTrue(URI.is_valid("ldap://[2001:db8::7]/c=GB?objectClass?one")) + self.assertTrue(URI.is_valid("mailto:John.Doe@example.com")) + self.assertTrue(URI.is_valid("news:comp.infosystems.www.servers.unix")) + self.assertTrue(URI.is_valid("tel:+1-816-555-1212")) + self.assertTrue(URI.is_valid("telnet://192.0.2.16:80/")) + self.assertTrue(URI.is_valid("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")) + self.assertTrue(URI.is_valid("file:///home/user/")) def test_bool(self): self.assertTrue(Bool(True)) From d9e688ccc9e048b1244acbead6977799b4d8c248 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Fri, 13 Oct 2023 21:43:56 +0200 Subject: [PATCH 7/8] style: clean-up dead-code Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/metamodelcore.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/linkml_runtime/utils/metamodelcore.py b/linkml_runtime/utils/metamodelcore.py index 1df4e325..957a1269 100644 --- a/linkml_runtime/utils/metamodelcore.py +++ b/linkml_runtime/utils/metamodelcore.py @@ -144,10 +144,6 @@ def __init__(self, v: str) -> None: raise ValueError(f"'{v}': is not a valid URI") super().__init__(v) - # this is more inclusive than the W3C specification - #uri_re = re.compile("^[A-Za-z]\\S*$") - uri_re = re.compile("^\\S+$") - @classmethod def is_valid(cls, v: str) -> bool: if validate_uri(v): From b86c1fa8e353474f282b377db2b51eba0ffabace Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Fri, 13 Oct 2023 21:44:25 +0200 Subject: [PATCH 8/8] test: improve test coverage Add new tests to improve test coverage. Remove also unused (and therefore not tested) functionality. Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/uri_validator.py | 7 ------- tests/test_utils/test_metamodelcore.py | 7 +++++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/linkml_runtime/utils/uri_validator.py b/linkml_runtime/utils/uri_validator.py index 13154068..70203d27 100644 --- a/linkml_runtime/utils/uri_validator.py +++ b/linkml_runtime/utils/uri_validator.py @@ -356,14 +356,7 @@ def validate_uri_reference(input): return uri_validator.match(input) or uri_relative_ref_validator.match(input) -def validate_absolute_uri(input): - return abs_uri_validator.match(input) - - def validate_curie(input): # print(CURIE) return curie_validator.match(input) - -def validate_safe_curie(input): - return safe_curie_validator.match(input) diff --git a/tests/test_utils/test_metamodelcore.py b/tests/test_utils/test_metamodelcore.py index 3d0fe8b2..2d74e6b1 100644 --- a/tests/test_utils/test_metamodelcore.py +++ b/tests/test_utils/test_metamodelcore.py @@ -47,11 +47,14 @@ def test_uriorcuries(self): URIorCURIE("1:def") with self.assertRaises(ValueError): URIorCURIE(" ") - #with self.assertRaises(ValueError): - # URIorCURIE("_") + with self.assertRaises(ValueError): + URIorCURIE("[") lax() URIorCURIE("1abc:def") URIorCURIE("1:def") + self.assertFalse(URIorCURIE.is_valid(123)) + URIorCURIE.is_curie("abc:123") + self.assertFalse(URIorCURIE.is_curie("http://example.org/path")) def test_curie(self): """ Test the CURIE type """