Skip to content

Commit

Permalink
fix(validation): additional URN validation adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker committed Nov 27, 2024
1 parent f9cc67d commit 7a67a18
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 11 deletions.
13 changes: 10 additions & 3 deletions docs/what/urn.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,18 @@ urn:li:dataset:(urn:li:dataPlatform:hdfs,PageViewEvent,EI)

## Restrictions

There are a few restrictions when creating an urn:
There are a few restrictions when creating an URN:

The following characters are not allowed anywhere in the URN

1. Commas are reserved character in URN fields: `,`
2. Parentheses are reserved characters in URN fields: `(` or `)`
3. Colons are reserved characters in URN fields: `:`
4. Urn separator UTF-8 character ``
3. URN separator UTF-8 character ``

The following characters are allowed within an URN tuple only.

1. Colons are reserved characters in URN fields: `:`

Example: `urn:li:dashboard:(looker,dashboards.thelook::cohort_data_tool)`

Please do not use these characters when creating or generating urns. One approach is to use URL encoding for the characters.
4 changes: 4 additions & 0 deletions metadata-io/metadata-io-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ dependencies {
testImplementation externalDependency.lombok
testAnnotationProcessor externalDependency.lombok
}

test {
environment 'STRICT_URN_VALIDATION_ENABLED', 'true'
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class ValidationApiUtils {
public static final String URN_DELIMITER_SEPARATOR = "␟";
// https://datahubproject.io/docs/what/urn/#restrictions
public static final Set<String> ILLEGAL_URN_COMPONENT_CHARACTERS = Set.of(":", "(", ")", ",");
public static final String URN_TUPLE_ALLOWED_CHARACTERS_REGEX = "[:]";

/**
* Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails.
Expand Down Expand Up @@ -86,9 +87,10 @@ public static void validateUrn(
"Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character");
}

int totalParts = urn.getEntityKey().getParts().size();
List<String> illegalComponents =
urn.getEntityKey().getParts().stream()
.flatMap(ValidationApiUtils::processUrnPartRecursively)
.flatMap(part -> processUrnPartRecursively(part, totalParts))
.filter(
urnPart -> ILLEGAL_URN_COMPONENT_CHARACTERS.stream().anyMatch(urnPart::contains))
.collect(Collectors.toList());
Expand All @@ -114,15 +116,19 @@ public static void validateUrn(
}

/** Recursively process URN parts with URL decoding */
private static Stream<String> processUrnPartRecursively(String urnPart) {
private static Stream<String> processUrnPartRecursively(String urnPart, int totalParts) {
String decodedPart =
URLDecoder.decode(URLEncodingFixer.fixURLEncoding(urnPart), StandardCharsets.UTF_8);
if (decodedPart.startsWith("urn:li:")) {
// Recursively process nested URN after decoding
int nestedParts = UrnUtils.getUrn(decodedPart).getEntityKey().getParts().size();
return UrnUtils.getUrn(decodedPart).getEntityKey().getParts().stream()
.flatMap(ValidationApiUtils::processUrnPartRecursively);
.flatMap(part -> processUrnPartRecursively(part, nestedParts));
}
return Stream.of(decodedPart);
if (totalParts > 1) {
return Stream.of(urnPart.replaceAll(URN_TUPLE_ALLOWED_CHARACTERS_REGEX, "%3A"));
}
return Stream.of(urnPart);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,20 @@ public void testValidComplexUrn() {
UrnUtils.getUrn(
"urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.dataset.table,PROD)");

ValidationApiUtils.validateUrn(entityRegistry, validUrn);
ValidationApiUtils.validateUrn(entityRegistry, validUrn, true);
// If no exception is thrown, test passes
}

@Test(expectedExceptions = NullPointerException.class)
public void testUrnNull() {
ValidationApiUtils.validateUrn(entityRegistry, null);
ValidationApiUtils.validateUrn(entityRegistry, null, true);
}

@Test
public void testValidPartialUrlEncode() {
Urn validUrn = UrnUtils.getUrn("urn:li:assertion:123=-%28__% weekly__%29");

ValidationApiUtils.validateUrn(entityRegistry, validUrn);
ValidationApiUtils.validateUrn(entityRegistry, validUrn, true);
// If no exception is thrown, test passes
}

Expand All @@ -106,7 +106,28 @@ public void testValidPartialUrlEncode2() {
UrnUtils.getUrn(
"urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts%prog_maintenance%2CPROD%29,PROD)");

ValidationApiUtils.validateUrn(entityRegistry, validUrn);
ValidationApiUtils.validateUrn(entityRegistry, validUrn, true);
// If no exception is thrown, test passes
}

@Test
public void testValidColon() {
Urn validUrn =
UrnUtils.getUrn("urn:li:dashboard:(looker,dashboards.thelook::cohort_data_tool)");

ValidationApiUtils.validateUrn(entityRegistry, validUrn, true);
// If no exception is thrown, test passes
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void testNoTupleColon() {
Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser::");
ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true);
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void testNoTupleComma() {
Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:,");
ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true);
}
}

0 comments on commit 7a67a18

Please sign in to comment.