diff --git a/norconex-commons-lang/pom.xml b/norconex-commons-lang/pom.xml
index 691016f8..92d1c802 100644
--- a/norconex-commons-lang/pom.xml
+++ b/norconex-commons-lang/pom.xml
@@ -19,7 +19,7 @@
Adds a trailing slash (/) right after the domain for URLs with no + * path, before any fragment (#) or query string (?).
+ * + *Please Note: Adding a trailing slash to URLs could + * potentially break its semantic equivalence.
+ *http://www.example.com →
+ * http://www.example.com/
+ * @return this instance
+ * @since 1.12.0
+ */
+ public URLNormalizer addDomainTrailingSlash() {
+ String urlRoot = HttpURL.getRoot(url);
+ String path = toURL().getPath();
+ if (StringUtils.isNotBlank(path)) {
+ // there is a path so do nothing
+ return this;
+ }
+ String urlRootAndPath = urlRoot + "/";
+ url = StringUtils.replaceOnce(url, urlRoot, urlRootAndPath);
+ return this;
+ }
/**
* Adds a trailing slash (/) to a URL ending with a directory. A URL is diff --git a/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/HttpURLTest.java b/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/HttpURLTest.java index 2acc7c60..3020dc5c 100644 --- a/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/HttpURLTest.java +++ b/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/HttpURLTest.java @@ -48,6 +48,13 @@ public void tearDown() throws Exception { t = null; } + @Test + public void testKeepProtocolUpperCase() { + s = "HTTP://www.example.com"; + t = "HTTP://www.example.com"; + assertEquals(t, new HttpURL(s).toString()); + } + @Test public void testToAbsoluteRelativeToProtocol() { s = "//www.relative.com/e/f.html"; diff --git a/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/URLNormalizerTest.java b/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/URLNormalizerTest.java index 17cb1006..9965b5c3 100644 --- a/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/URLNormalizerTest.java +++ b/norconex-commons-lang/src/test/java/com/norconex/commons/lang/url/URLNormalizerTest.java @@ -86,6 +86,41 @@ public void testAllAtOnce() { assertEquals(t, n.toURI().toString()); } + @Test + public void testAddDomainTrailingSlash() { + s = "http://www.example.com"; + t = "http://www.example.com/"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com/"; + t = "http://www.example.com/"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com/blah"; + t = "http://www.example.com/blah"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com/blah/path"; + t = "http://www.example.com/blah/path"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com?param1=value1¶m2=value2"; + t = "http://www.example.com/?param1=value1¶m2=value2"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com/?param1=value1¶m2=value2"; + t = "http://www.example.com/?param1=value1¶m2=value2"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com#hash"; + t = "http://www.example.com/#hash"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + + s = "http://www.example.com/#hash"; + t = "http://www.example.com/#hash"; + assertEquals(t, n(s).addDomainTrailingSlash().toString()); + } + @Test public void testEncodeUTF8Characters() { @@ -99,6 +134,12 @@ public void testEncodeNonURICharacters() { s = "http://www.example.com/^a [b]/c?d e="; t = "http://www.example.com/%5Ea%20%5Bb%5D/c?d+e="; assertEquals(t, n(s).encodeNonURICharacters().toString()); + + //Test for https://github.com/Norconex/collector-http/issues/294 + //Was failing when HTTP was uppercase + s = "HTTP://www.Example.com/"; + t = "HTTP://www.Example.com/"; + assertEquals(t, n(s).encodeNonURICharacters().toString()); } @Test