From 422ee477d94637f54435ef79eb2914e879b2cc68 Mon Sep 17 00:00:00 2001 From: James McMullan Date: Wed, 5 Jun 2024 09:55:34 -0400 Subject: [PATCH] HPCC4J-605 Connection: Improve Invalid URL Error Message (#712) Signed-off-by: James McMullan James.McMullan@lexisnexis.com --- .../ws/client/utils/Connection.java | 27 ++++++++++++++++++- .../ws/client/utils/ConnectionTest.java | 24 +++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/wsclient/src/main/java/org/hpccsystems/ws/client/utils/Connection.java b/wsclient/src/main/java/org/hpccsystems/ws/client/utils/Connection.java index a28da183d..b7e4ec50c 100644 --- a/wsclient/src/main/java/org/hpccsystems/ws/client/utils/Connection.java +++ b/wsclient/src/main/java/org/hpccsystems/ws/client/utils/Connection.java @@ -12,6 +12,8 @@ import java.util.Base64; import java.util.Base64.Decoder; import java.util.Base64.Encoder; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -205,6 +207,9 @@ public int hashCode() private StringBuffer baseUrl; private StringBuffer uriAndParams; + // Note: this pattern is very basic and is only meant to extract hostnames from URLs + public final static Pattern URL_HOSTNAME_PATTERN = Pattern.compile("((https?|ftp|file):\\/\\/)?(?([\\da-z\\.\\-_]+)(\\.[a-z\\.]{2,6})?)(:\\d{2,6})?.*"); + /** Constant CONNECT_TIMEOUT_PARAM="connecttimeoutmillis" */ final static public String CONNECT_TIMEOUT_PARAM = "connecttimeoutmillis"; /** Constant READ_TIMEOUT_PARAM="readtimeoutmillis" */ @@ -287,7 +292,27 @@ public static boolean isSslProtocol(String protocol) */ public Connection(String connectionstring) throws MalformedURLException { - URL theurl = new URL(connectionstring); + URL theurl = null; + try + { + theurl = new URL(connectionstring); + } + catch (MalformedURLException e) + { + Matcher matcher = URL_HOSTNAME_PATTERN.matcher(connectionstring); + if (matcher.matches()) + { + String hostName = matcher.group("hostname"); + if (hostName.contains("_")) + { + throw new MalformedURLException("Invalid URL: Hostname contains invalid underscores: '" + connectionstring + "': " + e.getMessage()); + } + } + else + { + throw e; + } + } setProtocol(theurl.getProtocol()); diff --git a/wsclient/src/test/java/org/hpccsystems/ws/client/utils/ConnectionTest.java b/wsclient/src/test/java/org/hpccsystems/ws/client/utils/ConnectionTest.java index edfc9d640..278b40540 100644 --- a/wsclient/src/test/java/org/hpccsystems/ws/client/utils/ConnectionTest.java +++ b/wsclient/src/test/java/org/hpccsystems/ws/client/utils/ConnectionTest.java @@ -19,6 +19,7 @@ import static org.junit.Assert.*; import java.net.MalformedURLException; +import java.util.regex.Matcher; import org.junit.Test; @@ -107,4 +108,27 @@ public void testInvalidProtHostPort() throws MalformedURLException assertFalse(con.getIsHttps()); assertEquals(con.getProtocol(), http); } + + @Test + public void hostNamePatternTest() throws MalformedURLException + { + // Note: we want to test improved error messaging with underscores, but not all versions + // of Java throw an exception for underscores in hostnames. + // So we are testing the pattern instead + String[] urls = { + "https://invalid_host_name.test:8010?params", + "https://invalid_host_name.test:8010", + "http://invalid_host_name.test:8010", + "invalid_host_name.test:8010", + "invalid_host_name.test" + }; + + String hostName = "invalid_host_name.test"; + for (String url : urls) + { + Matcher matcher = Connection.URL_HOSTNAME_PATTERN.matcher(url); + assertTrue(matcher.matches()); + assertEquals(matcher.group("hostname"), hostName); + } + } }