From c26095a55c30e21865a536f4ae08ddcec17c16c9 Mon Sep 17 00:00:00 2001 From: Pascal Essiembre Date: Wed, 24 Jul 2019 16:50:37 -0400 Subject: [PATCH] HttpURL#toAbsolute(String, String) will no longer try to make absolute URLs without the two slashes after colon (e.g., tel:12345). --- norconex-commons-lang/src/changes/changes.xml | 6 +- .../norconex/commons/lang/url/HttpURL.java | 70 +++++++++---------- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/norconex-commons-lang/src/changes/changes.xml b/norconex-commons-lang/src/changes/changes.xml index a7715517..98773eb0 100644 --- a/norconex-commons-lang/src/changes/changes.xml +++ b/norconex-commons-lang/src/changes/changes.xml @@ -7,7 +7,7 @@ - + Added new normalization rules to URLNormalizer: removeQueryString, lowerCase, lowerCasePath, lowerCaseQuery, lowerCaseQueryParameterNames, @@ -17,6 +17,10 @@ CachedInputStream no longer truncates last character under certain conditions. + + HttpURL#toAbsolute(String, String) will no longer try to make absolute + URLs without the two slashes after colon (e.g., tel:12345). + diff --git a/norconex-commons-lang/src/main/java/com/norconex/commons/lang/url/HttpURL.java b/norconex-commons-lang/src/main/java/com/norconex/commons/lang/url/HttpURL.java index 59578864..37fabbfa 100644 --- a/norconex-commons-lang/src/main/java/com/norconex/commons/lang/url/HttpURL.java +++ b/norconex-commons-lang/src/main/java/com/norconex/commons/lang/url/HttpURL.java @@ -29,27 +29,27 @@ /** * This class act as a mutable URL, which could be a replacement - * or "wrapper" to the {@link URL} class. It can also be used as a safer way - * to build a {@link URL} or a {@link URI} instance as it will properly escape + * or "wrapper" to the {@link URL} class. It can also be used as a safer way + * to build a {@link URL} or a {@link URI} instance as it will properly escape * appropriate characters before creating those. - * + * * @author Pascal Essiembre */ //TODO rename MutableURL public class HttpURL implements Serializable { private static final long serialVersionUID = -8886393027925815099L; - + /** Default URL HTTP Port. */ public static final int DEFAULT_HTTP_PORT = 80; /** Default Secure URL HTTP Port. */ public static final int DEFAULT_HTTPS_PORT = 443; - + /** Constant for "http" protocol. */ public static final String PROTOCOL_HTTP = "http"; /** Constant for "https" protocol. */ public static final String PROTOCOL_HTTPS = "https"; - + private QueryString queryString; private String host; private int port = -1; @@ -57,7 +57,7 @@ public class HttpURL implements Serializable { private String protocol; private final String encoding; private String fragment; - + /** * Creates a blank HttpURL using UTF-8 for URL encoding. */ @@ -103,8 +103,8 @@ public HttpURL(String url, String encoding) { } else { this.encoding = encoding; } - - String u = StringUtils.trimToEmpty(url); + + String u = StringUtils.trimToEmpty(url); if (u.matches("[a-zA-Z][a-zA-Z0-9\\+\\-\\.]*:.*")) { URL urlwrap; try { @@ -131,14 +131,14 @@ public HttpURL(String url, String encoding) { fragment = u.replaceFirst("^(.*?)(\\#)(.*)", "$3"); } } - + // Parameters if (StringUtils.contains(u, "?")) { queryString = new QueryString(u, encoding); } } - + /** * Gets the character encoding. Default is UTF-8. * @return character encoding @@ -177,7 +177,7 @@ public QueryString getQueryString() { public void setQueryString(QueryString queryString) { this.queryString = queryString; } - + /** * Gets the host portion of the URL. * @return the host portion of the URL @@ -216,8 +216,8 @@ public boolean isSecure() { } /** - * Gets the URL port. If the protocol is other than - * http or https, the port is -1 when + * Gets the URL port. If the protocol is other than + * http or https, the port is -1 when * not specified. * @return the URL port */ @@ -231,7 +231,7 @@ public int getPort() { public void setPort(int port) { this.port = port; } - + /** * Gets the URL fragment. * @return the fragment @@ -251,7 +251,7 @@ public void setFragment(String fragment) { /** * Gets the last URL path segment without the query string. - * If there are segment to return, + * If there are segment to return, * an empty string will be returned instead. * @return the last URL path segment */ @@ -264,7 +264,7 @@ public String getLastPathSegment() { return segment; } /** - * Converts this HttpURL to a regular {@link URL}, making sure + * Converts this HttpURL to a regular {@link URL}, making sure * appropriate characters are escaped properly. * @return a URL * @throws URLException when URL is malformed @@ -277,9 +277,9 @@ public URL toURL() { throw new URLException("Cannot convert to URL: " + url, e); } } - + /** - * Gets the root of this HttpUrl. That is the left part of a URL up to + * Gets the root of this HttpUrl. That is the left part of a URL up to * and including the host name. A null or empty string returns * a null document root. * @return left part of a URL up to (and including the host name @@ -289,9 +289,9 @@ public URL toURL() { public String getRoot() { return getRoot(toString()); } - + /** - * Converts this HttpURL to a {@link URI}, making sure + * Converts this HttpURL to a {@link URI}, making sure * appropriate characters are escaped properly. * @return a URI * @since 1.7.0 @@ -307,7 +307,7 @@ public URI toURI() { } /** *

- * Converts the supplied URL to a {@link URL}, making sure + * Converts the supplied URL to a {@link URL}, making sure * appropriate characters are encoded properly using UTF-8. This method * is a short form of:
* new HttpURL("http://example.com").toURL(); @@ -321,7 +321,7 @@ public static URL toURL(String url) { return new HttpURL(url).toURL(); } /** - *

Converts the supplied URL to a {@link URI}, making sure + *

Converts the supplied URL to a {@link URI}, making sure * appropriate characters are encoded properly using UTF-8. This method * is a short form of:
* new HttpURL("http://example.com").toURI(); @@ -336,7 +336,7 @@ public static URI toURI(String url) { } /** - *

Gets the root of a URL. That is the left part of a URL up to and + *

Gets the root of a URL. That is the left part of a URL up to and * including the host name. A null or empty string returns * a null document root. * This method is a short form of:
@@ -352,7 +352,7 @@ public static String getRoot(String url) { } return StringUtils.replacePattern(url, "(.*?://.*?)([/?#].*)", "$1"); } - + /** * Returns a string representation of this URL, properly encoded. * @return URL as a string @@ -390,7 +390,7 @@ public String toString() { } /** - * Whether this URL uses the default port for the protocol. The default + * Whether this URL uses the default port for the protocol. The default * port is 80 for "http" protocol, and 443 for "https". Other protocols * are not supported and this method will always return false * for them. @@ -398,14 +398,14 @@ public String toString() { * @since 1.8.0 */ public boolean isPortDefault() { - return PROTOCOL_HTTPS.equalsIgnoreCase(protocol) + return PROTOCOL_HTTPS.equalsIgnoreCase(protocol) && port == DEFAULT_HTTPS_PORT || PROTOCOL_HTTP.equalsIgnoreCase(protocol) && port == DEFAULT_HTTP_PORT; } - + /** - *

URL-Encodes the query string portion of a URL. The entire + *

URL-Encodes the query string portion of a URL. The entire * string supplied is assumed to be a query string. * @param queryString URL query string * @return encoded path @@ -417,7 +417,7 @@ public static String encodeQueryString(String queryString) { } return new QueryString(queryString).toString(); } - + /** *

URL-Encodes a URL path. The entire string supplied is assumed * to be a URL path. Unsafe characters are percent-encoded using UTF-8 @@ -427,7 +427,7 @@ public static String encodeQueryString(String queryString) { * @since 1.7.0 */ public static String encodePath(String path) { - // Any characters that are not one of the following are + // Any characters that are not one of the following are // percent-encoded (including spaces): // a-z A-Z 0-9 . - _ ~ ! $ & ' ( ) * + , ; = : @ / % if (StringUtils.isBlank(path)) { @@ -448,10 +448,10 @@ public static String encodePath(String path) { bytes = Character.toString(ch).getBytes(StandardCharsets.UTF_8); for (byte b : bytes) { sb.append('%'); - int upper = (((int) b) >> 4) & 0xf; + int upper = ((b) >> 4) & 0xf; sb.append(Integer.toHexString( upper).toUpperCase(Locale.US)); - int lower = ((int) b) & 0xf; + int lower = (b) & 0xf; sb.append(Integer.toHexString( lower).toUpperCase(Locale.US)); } @@ -487,7 +487,7 @@ public static String toAbsolute(String baseURL, String relativeURL) { } // Relative to last directory/segment - if (!relURL.contains("://")) { + if (!relURL.contains(":")) { String base = baseURL.replaceFirst("(.*?)([\\?\\#])(.*)", "$1"); if (StringUtils.countMatches(base, '/') > 2) { base = base.replaceFirst("(.*/)(.*)", "$1"); @@ -503,7 +503,7 @@ public static String toAbsolute(String baseURL, String relativeURL) { // Not detected as relative, so return as is return relURL; } - + @Override public int hashCode() { return new HashCodeBuilder()