From 3d8061ec9842f192f187a79e5d2cd9225bc6fdae Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Thu, 1 Aug 2024 09:31:43 +1200 Subject: [PATCH] Use own copy of PercentCodec for URI path encoding (#1109) * Use own copy of PercentCodec for URI path encoding Adapted from Apache HttpComponents HttpCore v5's https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java Signed-off-by: Thomas Farr * Refactor PercentCodec a bit Signed-off-by: Thomas Farr * Add change log Signed-off-by: Thomas Farr * Switch to system property Signed-off-by: Thomas Farr * spotless Signed-off-by: Thomas Farr * Add UPGRADING note Signed-off-by: Thomas Farr --------- Signed-off-by: Thomas Farr --- CHANGELOG.md | 1 + UPGRADING.md | 5 +- .../transport/endpoints/SimpleEndpoint.java | 2 +- .../opensearch/client/util/PathEncoder.java | 72 +++---- .../opensearch/client/util/PercentCodec.java | 180 ++++++++++++++++++ .../client/opensearch/model/EndpointTest.java | 28 +-- .../client/util/PathEncoderTest.java | 44 ----- .../client/util/PercentCodecTest.java | 64 +++++++ 8 files changed, 281 insertions(+), 115 deletions(-) create mode 100644 java-client/src/main/java/org/opensearch/client/util/PercentCodec.java delete mode 100644 java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java create mode 100644 java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index f91dd824b5..fdaefd86b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ This section is for maintaining a changelog for all breaking changes for the cli ### Dependencies ### Changed +- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109)) ### Deprecated diff --git a/UPGRADING.md b/UPGRADING.md index 4e437fa770..1b1a91c7c6 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -1,6 +1,9 @@ # UPGRADING -## [UPGRADING 2.x to 3.0] +## Upgrading 2.x to 3.0 +### URL Path Encoding +- The default URL path encoding has been changed to be more conservative. Previously the `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `@` and `:` characters were left un-encoded, they will now be percent-encoded. If you require the previous behavior you can specify the `org.opensearch.path.encoding=HTTP_CLIENT_V4_EQUIV` system property. + ### SearchAfter of SearchRequest type - Changed SearchAfter of SearchRequest type to FieldValue instead of String ([#769](https://github.com/opensearch-project/opensearch-java/pull/769)) - Consider using `FieldValue.of` to make string type values compatible. diff --git a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java index 6409c845bc..f886c12ee7 100644 --- a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java +++ b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) { } public static void pathEncode(String src, StringBuilder dest) { - dest.append(PathEncoder.encode(src)); + PathEncoder.encode(dest, src); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index c9345a003f..5d43b7c19a 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -8,59 +8,43 @@ package org.opensearch.client.util; -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.Collections; +import java.util.Optional; public class PathEncoder { - private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils"; - private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils"; - private final static MethodHandle FORMAT_SEGMENTS_MH; + private enum Encoding { + RFC3986_PATH(PercentCodec.RFC3986_PATH), + HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH), + + RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED), + HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED); - static { - Class clazz = null; - try { - // Try Apache HttpClient5 first since this is a default one - clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS); - } catch (final ClassNotFoundException ex) { + private final PercentCodec percentCodec; + + Encoding(PercentCodec percentCodec) { + this.percentCodec = percentCodec; + } + + static Optional get(String name) { try { - // Fallback to Apache HttpClient4 - clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS); - } catch (final ClassNotFoundException ex1) { - clazz = null; + return Optional.of(Encoding.valueOf(name.toUpperCase())); + } catch (Exception ignored) { + return Optional.empty(); } } + } - if (clazz == null) { - throw new IllegalStateException( - "Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath" - ); - } + private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding"; + private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV; - try { - FORMAT_SEGMENTS_MH = MethodHandles.lookup() - .findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class)); - } catch (final NoSuchMethodException | IllegalAccessException ex) { - throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class"); - } + private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY)) + .flatMap(Encoding::get) + .orElse(ENCODING_DEFAULT); + + public static String encode(String pathSegment) { + return ENCODING.percentCodec.encode(pathSegment); } - public static String encode(String uri) { - try { - return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1); - } catch (final Throwable ex) { - throw new RuntimeException("Unable to encode URI: " + uri, ex); - } + public static void encode(StringBuilder dest, CharSequence pathSegment) { + ENCODING.percentCodec.encode(dest, pathSegment); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java new file mode 100644 index 0000000000..9fb50637f9 --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.BitSet; + +/** + * Percent-encoding. + *

+ * Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java + *

+ */ +class PercentCodec { + private static class Chars { + private final BitSet set = new BitSet(256); + + public void add(char... chars) { + for (char c : chars) { + set.set(c); + } + } + + public void addRange(char start, char end) { + set.set(start, end + 1); + } + + public void add(Chars set) { + this.set.or(set.set); + } + + public boolean contains(int c) { + return set.get(c); + } + } + + private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() { + { + add(':', '/', '?', '#', '[', ']', '@'); + } + }; + private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() { + { + add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='); + } + }; + private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() { + { + addRange('a', 'z'); + addRange('A', 'Z'); + addRange('0', '9'); + add('-', '.', '_', '~'); + } + }; + private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() { + { + add(RFC3986_UNRESERVED_CHARS); + add(RFC3986_SUB_DELIMS_CHARS); + add('@'); + } + }; + private static final Chars RFC3986_PATH_CHARS = new Chars() { + { + add(RFC3986_PATH_NO_COLON_CHARS); + add(':'); + } + }; + private static final Chars RFC3986_URIC_CHARS = new Chars() { + { + add(RFC3986_SUB_DELIMS_CHARS); + add(RFC3986_UNRESERVED_CHARS); + } + }; + + private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() { + { + addRange('a', 'z'); + addRange('A', 'Z'); + addRange('0', '9'); + // Additional characters as per RFC 5987 attr-char + add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~'); + } + }; + + private static final int RADIX = 16; + + private static void encode( + final StringBuilder buf, + final CharSequence content, + final Charset charset, + final Chars safeChars, + final boolean blankAsPlus + ) { + if (content == null) { + return; + } + final CharBuffer cb = CharBuffer.wrap(content); + final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb); + while (bb.hasRemaining()) { + final int b = bb.get() & 0xff; + if (safeChars.contains(b)) { + buf.append((char) b); + } else if (blankAsPlus && b == ' ') { + buf.append("+"); + } else { + buf.append("%"); + final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); + final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); + buf.append(hex1); + buf.append(hex2); + } + } + } + + private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) { + if (content == null) { + return null; + } + final ByteBuffer bb = ByteBuffer.allocate(content.length()); + final CharBuffer cb = CharBuffer.wrap(content); + while (cb.hasRemaining()) { + final char c = cb.get(); + if (c == '%' && cb.remaining() >= 2) { + final char uc = cb.get(); + final char lc = cb.get(); + final int u = Character.digit(uc, RADIX); + final int l = Character.digit(lc, RADIX); + if (u != -1 && l != -1) { + bb.put((byte) ((u << 4) + l)); + } else { + bb.put((byte) '%'); + bb.put((byte) uc); + bb.put((byte) lc); + } + } else if (plusAsBlank && c == '+') { + bb.put((byte) ' '); + } else { + bb.put((byte) c); + } + } + bb.flip(); + return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString(); + } + + public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS); + public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS); + public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS); + + private final Chars unreserved; + + private PercentCodec(final Chars unreserved) { + this.unreserved = unreserved; + } + + public void encode(final StringBuilder buf, final CharSequence content) { + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + } + + public String encode(final CharSequence content) { + if (content == null) { + return null; + } + final StringBuilder buf = new StringBuilder(); + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + return buf.toString(); + } + + public String decode(final CharSequence content) { + return decode(content, StandardCharsets.UTF_8, false); + } +} diff --git a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java index b37f98622a..680f8ebe6d 100644 --- a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java +++ b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java @@ -57,19 +57,10 @@ public void testArrayPathParameter() { assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - - } else { - assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b", "c")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @Test @@ -80,11 +71,7 @@ public void testPathEncoding() { assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a/b", "c/d")); - if (isHttpClient5Present()) { - assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @@ -103,13 +90,4 @@ public void testArrayQueryParameter() { req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed)); assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards")); } - - private static boolean isHttpClient5Present() { - try { - Class.forName("org.apache.hc.core5.net.URLEncodedUtils"); - return true; - } catch (ClassNotFoundException e) { - return false; - } - } } diff --git a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java b/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java deleted file mode 100644 index 0e0a5f8c15..0000000000 --- a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.client.util; - -import static org.junit.Assert.assertEquals; - -import org.junit.Test; - -public class PathEncoderTest { - - @Test - public void testEncode() { - // Test with a simple string - String simpleString = "test"; - String encodedSimpleString = PathEncoder.encode(simpleString); - assertEquals(simpleString, encodedSimpleString); - - // Test with a string that contains special characters - String specialString = "a/b"; - String encodedSpecialString = PathEncoder.encode(specialString); - assertEquals("a%2Fb", encodedSpecialString); - - // Test with a string that contains alphanumeric characters - String alphanumericString = "abc123"; - String encodedAlphanumericString = PathEncoder.encode(alphanumericString); - assertEquals("abc123", encodedAlphanumericString); - - // Test with a string that contains multiple segments - String multiSegmentString = "a/b/c/_refresh"; - String encodedMultiSegmentString = PathEncoder.encode(multiSegmentString); - assertEquals("a%2Fb%2Fc%2F_refresh", encodedMultiSegmentString); - - // Test with a string that contains colon segment - String colonSegmentString = "a:b:c::2.0"; - String encodedColonSegmentString = PathEncoder.encode(colonSegmentString); - assertEquals("a%3Ab%3Ac%3A%3A2.0", encodedColonSegmentString); - } -} diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java new file mode 100644 index 0000000000..7a13f69c58 --- /dev/null +++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PercentCodecTest { + @Parameterized.Parameters + public static Collection testData() { + return Arrays.asList( + new Object[][] { + // , , + { "test", "test", "test" }, + { "abc123", "abc123", "abc123" }, + { "a/b", "a%2Fb", "a%2Fb" }, + { "a/b/c/_refresh", "a%2Fb%2Fc%2F_refresh", "a%2Fb%2Fc%2F_refresh" }, + { "a:b:c:d:e::1.0", "a%3Ab%3Ac%3Ad%3Ae%3A%3A1.0", "a:b:c:d:e::1.0" }, + { "a,b,c", "a%2Cb%2Cc", "a,b,c" } } + ); + } + + private final String decoded; + private final String encodedRFC3986Unreserved; + private final String encodedRFC3986Path; + + public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986Path) { + this.decoded = decoded; + this.encodedRFC3986Unreserved = encodedRFC3986Unreserved; + this.encodedRFC3986Path = encodedRFC3986Path; + } + + @Test + public void test_RFC3986_UNRESERVED_encoding() { + assertEquals(this.encodedRFC3986Unreserved, PercentCodec.RFC3986_UNRESERVED.encode(this.decoded)); + } + + @Test + public void test_RFC3986_UNRESERVED_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_UNRESERVED.decode(this.encodedRFC3986Unreserved)); + } + + @Test + public void test_RFC3986_PATH_encoding() { + assertEquals(this.encodedRFC3986Path, PercentCodec.RFC3986_PATH.encode(this.decoded)); + } + + @Test + public void test_RFC3986_PATH_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_PATH.decode(this.encodedRFC3986Path)); + } +}