Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding IP6 characters into valid character list for URI parsing #1516

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/cpp/src/parse_uri.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ constexpr bool is_valid_character(char ch, bool alphanum_only)
if (ch >= 'A' && ch <= 'Z') return true; // A-Z
if (ch >= 'a' && ch <= 'z') return true; // a-z
} else {
if (ch >= '!' && ch <= ';' && ch != '"') return true; // 0-9 and !#%&'()*+,-./
if (ch >= '=' && ch <= 'Z' && ch != '>') return true; // A-Z and =?@
if (ch >= '!' && ch <= ':' && ch != '"') return true; // 0-9 and !#%&'()*+,-./:
if (ch >= '=' && ch <= ']' && ch != '>') return true; // A-Z and =?@[]
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
if (ch >= '_' && ch <= 'z' && ch != '`') return true; // a-z and _
}
return false;
Expand Down
47 changes: 47 additions & 0 deletions src/main/cpp/tests/parse_uri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,52 @@ TEST_F(ParseURIProtocolTests, SparkEdges)
"https"},
{1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP6)
{
cudf::test::strings_column_wrapper col({
"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "http", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, IP4)
{
cudf::test::strings_column_wrapper col({
"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "https", "https", "https", "https"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(ParseURIProtocolTests, UTF8)
{
cudf::test::strings_column_wrapper col({
"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com",
});
auto result = spark_rapids_jni::parse_uri_to_protocol(cudf::strings_column_view{col});

cudf::test::strings_column_wrapper expected({"https", "http"});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}
85 changes: 84 additions & 1 deletion src/test/java/com/nvidia/spark/rapids/jni/ParseURITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

public class ParseURITest {
@Test
void parseURIToProtocolTest() {
void parseURIToProtocolSparkTest() {
String[] testData = {"https://nvidia.com/https&#://nvidia.com",
"https://http://www.nvidia.com",
"filesystemmagicthing://bob.yaml",
Expand All @@ -50,6 +50,89 @@ void parseURIToProtocolTest() {
"http//www.nvidia.com/q",
"",
null};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolUTF8Test() {
String[] testData = {"https://nvidia.com/%4EV%49%44%49%41",
"http://%77%77%77.%4EV%49%44%49%41.com"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
}

@Test
void parseURIToProtocolIP4Test() {
String[] testData = {"https://192.168.1.100/",
"https://192.168.1.100:8443/",
"https://192.168.1.100.5/",
"https://192.168.1/",
"https://280.100.1.1/"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
String scheme = null;
try {
URI uri = new URI(testData[i]);
scheme = uri.getScheme();
} catch (URISyntaxException ex) {
// leave the scheme null if URI is invalid
} catch (NullPointerException ex) {
// leave the scheme null if URI is null
}
expectedStrings[i] = scheme;
}
try (ColumnVector v0 = ColumnVector.fromStrings(testData);
ColumnVector expected = ColumnVector.fromStrings(expectedStrings);
ColumnVector result = ParseURI.parseURIProtocol(v0)) {
AssertUtils.assertColumnsAreEqual(expected, result);
}
}

@Test
void parseURIToProtocolIP6Test() {
String[] testData = {"https://[fe80::]",
"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
"https://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]",
"https://[2001:db8::1:0]",
"http://[2001:db8::2:1]",
"https://[::1]",
"https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443"};

String[] expectedStrings = new String[testData.length];
for (int i=0; i<testData.length; i++) {
Expand Down