Skip to content

Commit

Permalink
HttpURL#toAbsolute(String, String) will no longer try to make absolute
Browse files Browse the repository at this point in the history
URLs without the two slashes after colon (e.g., tel:12345).
  • Loading branch information
essiembre committed Jul 24, 2019
1 parent b31862c commit c26095a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 36 deletions.
6 changes: 5 additions & 1 deletion norconex-commons-lang/src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</properties>
<body>

<release version="1.15.1-SNAPSHOT" date="2019-??-??" description="Bugfix release">
<release version="1.15.1-SNAPSHOT" date="2019-??-??" description="Maintenance release">
<action dev="essiembre" type="add">
Added new normalization rules to URLNormalizer: removeQueryString,
lowerCase, lowerCasePath, lowerCaseQuery, lowerCaseQueryParameterNames,
Expand All @@ -17,6 +17,10 @@
CachedInputStream no longer truncates last character under certain
conditions.
</action>
<action dev="essiembre" type="fix">
HttpURL#toAbsolute(String, String) will no longer try to make absolute
URLs without the two slashes after colon (e.g., tel:12345).
</action>
</release>

<release version="1.15.0" date="2018-06-03" description="Feature release">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,35 @@

/**
* This class act as a mutable URL, which could be a replacement
* or "wrapper" to the {@link URL} class. It can also be used as a safer way
* to build a {@link URL} or a {@link URI} instance as it will properly escape
* or "wrapper" to the {@link URL} class. It can also be used as a safer way
* to build a {@link URL} or a {@link URI} instance as it will properly escape
* appropriate characters before creating those.
*
*
* @author Pascal Essiembre
*/
//TODO rename MutableURL
public class HttpURL implements Serializable {

private static final long serialVersionUID = -8886393027925815099L;

/** Default URL HTTP Port. */
public static final int DEFAULT_HTTP_PORT = 80;
/** Default Secure URL HTTP Port. */
public static final int DEFAULT_HTTPS_PORT = 443;

/** Constant for "http" protocol. */
public static final String PROTOCOL_HTTP = "http";
/** Constant for "https" protocol. */
public static final String PROTOCOL_HTTPS = "https";

private QueryString queryString;
private String host;
private int port = -1;
private String path;
private String protocol;
private final String encoding;
private String fragment;

/**
* Creates a blank HttpURL using UTF-8 for URL encoding.
*/
Expand Down Expand Up @@ -103,8 +103,8 @@ public HttpURL(String url, String encoding) {
} else {
this.encoding = encoding;
}
String u = StringUtils.trimToEmpty(url);

String u = StringUtils.trimToEmpty(url);
if (u.matches("[a-zA-Z][a-zA-Z0-9\\+\\-\\.]*:.*")) {
URL urlwrap;
try {
Expand All @@ -131,14 +131,14 @@ public HttpURL(String url, String encoding) {
fragment = u.replaceFirst("^(.*?)(\\#)(.*)", "$3");
}
}

// Parameters
if (StringUtils.contains(u, "?")) {
queryString = new QueryString(u, encoding);
}
}


/**
* Gets the character encoding. Default is UTF-8.
* @return character encoding
Expand Down Expand Up @@ -177,7 +177,7 @@ public QueryString getQueryString() {
public void setQueryString(QueryString queryString) {
this.queryString = queryString;
}

/**
* Gets the host portion of the URL.
* @return the host portion of the URL
Expand Down Expand Up @@ -216,8 +216,8 @@ public boolean isSecure() {
}

/**
* Gets the URL port. If the protocol is other than
* <code>http</code> or <code>https</code>, the port is -1 when
* Gets the URL port. If the protocol is other than
* <code>http</code> or <code>https</code>, the port is -1 when
* not specified.
* @return the URL port
*/
Expand All @@ -231,7 +231,7 @@ public int getPort() {
public void setPort(int port) {
this.port = port;
}

/**
* Gets the URL fragment.
* @return the fragment
Expand All @@ -251,7 +251,7 @@ public void setFragment(String fragment) {

/**
* Gets the last URL path segment without the query string.
* If there are segment to return,
* If there are segment to return,
* an empty string will be returned instead.
* @return the last URL path segment
*/
Expand All @@ -264,7 +264,7 @@ public String getLastPathSegment() {
return segment;
}
/**
* Converts this HttpURL to a regular {@link URL}, making sure
* Converts this HttpURL to a regular {@link URL}, making sure
* appropriate characters are escaped properly.
* @return a URL
* @throws URLException when URL is malformed
Expand All @@ -277,9 +277,9 @@ public URL toURL() {
throw new URLException("Cannot convert to URL: " + url, e);
}
}

/**
* Gets the root of this HttpUrl. That is the left part of a URL up to
* Gets the root of this HttpUrl. That is the left part of a URL up to
* and including the host name. A <code>null</code> or empty string returns
* a <code>null</code> document root.
* @return left part of a URL up to (and including the host name
Expand All @@ -289,9 +289,9 @@ public URL toURL() {
public String getRoot() {
return getRoot(toString());
}

/**
* Converts this HttpURL to a {@link URI}, making sure
* Converts this HttpURL to a {@link URI}, making sure
* appropriate characters are escaped properly.
* @return a URI
* @since 1.7.0
Expand All @@ -307,7 +307,7 @@ public URI toURI() {
}
/**
* <p>
* Converts the supplied URL to a {@link URL}, making sure
* Converts the supplied URL to a {@link URL}, making sure
* appropriate characters are encoded properly using UTF-8. This method
* is a short form of:<br>
* <code>new HttpURL("http://example.com").toURL();</code>
Expand All @@ -321,7 +321,7 @@ public static URL toURL(String url) {
return new HttpURL(url).toURL();
}
/**
* <p>Converts the supplied URL to a {@link URI}, making sure
* <p>Converts the supplied URL to a {@link URI}, making sure
* appropriate characters are encoded properly using UTF-8. This method
* is a short form of:<br>
* <code>new HttpURL("http://example.com").toURI();</code>
Expand All @@ -336,7 +336,7 @@ public static URI toURI(String url) {
}

/**
* <p>Gets the root of a URL. That is the left part of a URL up to and
* <p>Gets the root of a URL. That is the left part of a URL up to and
* including the host name. A <code>null</code> or empty string returns
* a <code>null</code> document root.
* This method is a short form of:<br>
Expand All @@ -352,7 +352,7 @@ public static String getRoot(String url) {
}
return StringUtils.replacePattern(url, "(.*?://.*?)([/?#].*)", "$1");
}

/**
* Returns a string representation of this URL, properly encoded.
* @return URL as a string
Expand Down Expand Up @@ -390,22 +390,22 @@ public String toString() {
}

/**
* Whether this URL uses the default port for the protocol. The default
* Whether this URL uses the default port for the protocol. The default
* port is 80 for "http" protocol, and 443 for "https". Other protocols
* are not supported and this method will always return false
* for them.
* @return <code>true</code> if the URL is using the default port.
* @since 1.8.0
*/
public boolean isPortDefault() {
return PROTOCOL_HTTPS.equalsIgnoreCase(protocol)
return PROTOCOL_HTTPS.equalsIgnoreCase(protocol)
&& port == DEFAULT_HTTPS_PORT
|| PROTOCOL_HTTP.equalsIgnoreCase(protocol)
&& port == DEFAULT_HTTP_PORT;
}

/**
* <p>URL-Encodes the query string portion of a URL. The entire
* <p>URL-Encodes the query string portion of a URL. The entire
* string supplied is assumed to be a query string.
* @param queryString URL query string
* @return encoded path
Expand All @@ -417,7 +417,7 @@ public static String encodeQueryString(String queryString) {
}
return new QueryString(queryString).toString();
}

/**
* <p>URL-Encodes a URL path. The entire string supplied is assumed
* to be a URL path. Unsafe characters are percent-encoded using UTF-8
Expand All @@ -427,7 +427,7 @@ public static String encodeQueryString(String queryString) {
* @since 1.7.0
*/
public static String encodePath(String path) {
// Any characters that are not one of the following are
// Any characters that are not one of the following are
// percent-encoded (including spaces):
// a-z A-Z 0-9 . - _ ~ ! $ &amp; ' ( ) * + , ; = : @ / %
if (StringUtils.isBlank(path)) {
Expand All @@ -448,10 +448,10 @@ public static String encodePath(String path) {
bytes = Character.toString(ch).getBytes(StandardCharsets.UTF_8);
for (byte b : bytes) {
sb.append('%');
int upper = (((int) b) >> 4) & 0xf;
int upper = ((b) >> 4) & 0xf;
sb.append(Integer.toHexString(
upper).toUpperCase(Locale.US));
int lower = ((int) b) & 0xf;
int lower = (b) & 0xf;
sb.append(Integer.toHexString(
lower).toUpperCase(Locale.US));
}
Expand Down Expand Up @@ -487,7 +487,7 @@ public static String toAbsolute(String baseURL, String relativeURL) {
}

// Relative to last directory/segment
if (!relURL.contains("://")) {
if (!relURL.contains(":")) {
String base = baseURL.replaceFirst("(.*?)([\\?\\#])(.*)", "$1");
if (StringUtils.countMatches(base, '/') > 2) {
base = base.replaceFirst("(.*/)(.*)", "$1");
Expand All @@ -503,7 +503,7 @@ public static String toAbsolute(String baseURL, String relativeURL) {
// Not detected as relative, so return as is
return relURL;
}

@Override
public int hashCode() {
return new HashCodeBuilder()
Expand Down

0 comments on commit c26095a

Please sign in to comment.