From b5901c97c59ea04a980d74e73549e0cd6b4bc788 Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Thu, 25 Jul 2024 17:02:56 +1200 Subject: [PATCH 1/6] Use own copy of PercentCodec for URI path encoding Adapted from Apache HttpComponents HttpCore v5's https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java Signed-off-by: Thomas Farr --- .../transport/endpoints/SimpleEndpoint.java | 2 +- .../opensearch/client/util/PathEncoder.java | 69 ++----- .../opensearch/client/util/PercentCodec.java | 193 ++++++++++++++++++ .../client/opensearch/model/EndpointTest.java | 28 +-- .../client/util/PathEncoderTest.java | 44 ---- .../client/util/PercentCodecTest.java | 64 ++++++ 6 files changed, 282 insertions(+), 118 deletions(-) create mode 100644 java-client/src/main/java/org/opensearch/client/util/PercentCodec.java delete mode 100644 java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java create mode 100644 java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java diff --git a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java index 6409c845bc..f886c12ee7 100644 --- a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java +++ b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) { } public static void pathEncode(String src, StringBuilder dest) { - dest.append(PathEncoder.encode(src)); + PathEncoder.encode(dest, src); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index c9345a003f..0d1aa35703 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -8,59 +8,32 @@ package org.opensearch.client.util; -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.Collections; - public class PathEncoder { - private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils"; - private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils"; - private final static MethodHandle FORMAT_SEGMENTS_MH; - - static { - Class clazz = null; - try { - // Try Apache HttpClient5 first since this is a default one - clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS); - } catch (final ClassNotFoundException ex) { - try { - // Fallback to Apache HttpClient4 - clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS); - } catch (final ClassNotFoundException ex1) { - clazz = null; - } + public static final PercentCodec DEFAULT_CODEC = PercentCodec.RFC3986_UNRESERVED; + /** + * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding. + */ + @Deprecated + public static final PercentCodec APACHE_HTTP_CLIENT_4_COMPAT = PercentCodec.RFC3986_PATHSAFE; + private static PercentCodec codec; + + public static PercentCodec getCodec() { + if (codec == null) { + codec = DEFAULT_CODEC; } + return codec; + } - if (clazz == null) { - throw new IllegalStateException( - "Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath" - ); - } + public static void setCodec(PercentCodec codec) { + PathEncoder.codec = codec; + } - try { - FORMAT_SEGMENTS_MH = MethodHandles.lookup() - .findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class)); - } catch (final NoSuchMethodException | IllegalAccessException ex) { - throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class"); - } + + public static String encode(String pathSegment) { + return getCodec().encode(pathSegment); } - public static String encode(String uri) { - try { - return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1); - } catch (final Throwable ex) { - throw new RuntimeException("Unable to encode URI: " + uri, ex); - } + public static void encode(StringBuilder dest, CharSequence pathSegment) { + getCodec().encode(dest, pathSegment); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java new file mode 100644 index 0000000000..f0f6c47813 --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java @@ -0,0 +1,193 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.BitSet; + +/** + * Percent-encoding. + *

+ * Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java + *

+ */ +public class PercentCodec { + private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256); + private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256); + private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256); + private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256); + private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256); + private static final BitSet RFC3986_URIC_CHARS = new BitSet(256); + + static { + RFC3986_GEN_DELIMS_CHARS.set(':'); + RFC3986_GEN_DELIMS_CHARS.set('/'); + RFC3986_GEN_DELIMS_CHARS.set('?'); + RFC3986_GEN_DELIMS_CHARS.set('#'); + RFC3986_GEN_DELIMS_CHARS.set('['); + RFC3986_GEN_DELIMS_CHARS.set(']'); + RFC3986_GEN_DELIMS_CHARS.set('@'); + + RFC3986_SUB_DELIMS_CHARS.set('!'); + RFC3986_SUB_DELIMS_CHARS.set('$'); + RFC3986_SUB_DELIMS_CHARS.set('&'); + RFC3986_SUB_DELIMS_CHARS.set('\''); + RFC3986_SUB_DELIMS_CHARS.set('('); + RFC3986_SUB_DELIMS_CHARS.set(')'); + RFC3986_SUB_DELIMS_CHARS.set('*'); + RFC3986_SUB_DELIMS_CHARS.set('+'); + RFC3986_SUB_DELIMS_CHARS.set(','); + RFC3986_SUB_DELIMS_CHARS.set(';'); + RFC3986_SUB_DELIMS_CHARS.set('='); + + for (int i = 'a'; i <= 'z'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + // numeric characters + for (int i = '0'; i <= '9'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + RFC3986_UNRESERVED_CHARS.set('-'); + RFC3986_UNRESERVED_CHARS.set('.'); + RFC3986_UNRESERVED_CHARS.set('_'); + RFC3986_UNRESERVED_CHARS.set('~'); + + RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS); + RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); + RFC3986_PATHSAFE_NC_CHARS.set('@'); + + RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS); + RFC3986_PATHSAFE_CHARS.set(':'); + + RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); + RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS); + } + + private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256); + + static { + // Alphanumeric characters + for (int i = 'a'; i <= 'z'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + for (int i = '0'; i <= '9'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + + // Additional characters as per RFC 5987 attr-char + RFC5987_UNRESERVED_CHARS.set('!'); + RFC5987_UNRESERVED_CHARS.set('#'); + RFC5987_UNRESERVED_CHARS.set('$'); + RFC5987_UNRESERVED_CHARS.set('&'); + RFC5987_UNRESERVED_CHARS.set('+'); + RFC5987_UNRESERVED_CHARS.set('-'); + RFC5987_UNRESERVED_CHARS.set('.'); + RFC5987_UNRESERVED_CHARS.set('^'); + RFC5987_UNRESERVED_CHARS.set('_'); + RFC5987_UNRESERVED_CHARS.set('`'); + RFC5987_UNRESERVED_CHARS.set('|'); + RFC5987_UNRESERVED_CHARS.set('~'); + } + + private static final int RADIX = 16; + + private static void encode( + final StringBuilder buf, + final CharSequence content, + final Charset charset, + final BitSet safeChars, + final boolean blankAsPlus + ) { + if (content == null) { + return; + } + final CharBuffer cb = CharBuffer.wrap(content); + final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb); + while (bb.hasRemaining()) { + final int b = bb.get() & 0xff; + if (safeChars.get(b)) { + buf.append((char) b); + } else if (blankAsPlus && b == ' ') { + buf.append("+"); + } else { + buf.append("%"); + final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); + final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); + buf.append(hex1); + buf.append(hex2); + } + } + } + + private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) { + if (content == null) { + return null; + } + final ByteBuffer bb = ByteBuffer.allocate(content.length()); + final CharBuffer cb = CharBuffer.wrap(content); + while (cb.hasRemaining()) { + final char c = cb.get(); + if (c == '%' && cb.remaining() >= 2) { + final char uc = cb.get(); + final char lc = cb.get(); + final int u = Character.digit(uc, RADIX); + final int l = Character.digit(lc, RADIX); + if (u != -1 && l != -1) { + bb.put((byte) ((u << 4) + l)); + } else { + bb.put((byte) '%'); + bb.put((byte) uc); + bb.put((byte) lc); + } + } else if (plusAsBlank && c == '+') { + bb.put((byte) ' '); + } else { + bb.put((byte) c); + } + } + bb.flip(); + return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString(); + } + + public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS); + public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS); + public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS); + + private final BitSet unreserved; + + private PercentCodec(final BitSet unreserved) { + this.unreserved = unreserved; + } + + public void encode(final StringBuilder buf, final CharSequence content) { + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + } + + public String encode(final CharSequence content) { + if (content == null) { + return null; + } + final StringBuilder buf = new StringBuilder(); + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + return buf.toString(); + } + + public String decode(final CharSequence content) { + return decode(content, StandardCharsets.UTF_8, false); + } +} diff --git a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java index b37f98622a..680f8ebe6d 100644 --- a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java +++ b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java @@ -57,19 +57,10 @@ public void testArrayPathParameter() { assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - - } else { - assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b", "c")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @Test @@ -80,11 +71,7 @@ public void testPathEncoding() { assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a/b", "c/d")); - if (isHttpClient5Present()) { - assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @@ -103,13 +90,4 @@ public void testArrayQueryParameter() { req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed)); assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards")); } - - private static boolean isHttpClient5Present() { - try { - Class.forName("org.apache.hc.core5.net.URLEncodedUtils"); - return true; - } catch (ClassNotFoundException e) { - return false; - } - } } diff --git a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java b/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java deleted file mode 100644 index 0e0a5f8c15..0000000000 --- a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.client.util; - -import static org.junit.Assert.assertEquals; - -import org.junit.Test; - -public class PathEncoderTest { - - @Test - public void testEncode() { - // Test with a simple string - String simpleString = "test"; - String encodedSimpleString = PathEncoder.encode(simpleString); - assertEquals(simpleString, encodedSimpleString); - - // Test with a string that contains special characters - String specialString = "a/b"; - String encodedSpecialString = PathEncoder.encode(specialString); - assertEquals("a%2Fb", encodedSpecialString); - - // Test with a string that contains alphanumeric characters - String alphanumericString = "abc123"; - String encodedAlphanumericString = PathEncoder.encode(alphanumericString); - assertEquals("abc123", encodedAlphanumericString); - - // Test with a string that contains multiple segments - String multiSegmentString = "a/b/c/_refresh"; - String encodedMultiSegmentString = PathEncoder.encode(multiSegmentString); - assertEquals("a%2Fb%2Fc%2F_refresh", encodedMultiSegmentString); - - // Test with a string that contains colon segment - String colonSegmentString = "a:b:c::2.0"; - String encodedColonSegmentString = PathEncoder.encode(colonSegmentString); - assertEquals("a%3Ab%3Ac%3A%3A2.0", encodedColonSegmentString); - } -} diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java new file mode 100644 index 0000000000..178dd3db03 --- /dev/null +++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PercentCodecTest { + @Parameterized.Parameters + public static Collection testData() { + return Arrays.asList( + new Object[][] { + // , , + { "test", "test", "test" }, + { "abc123", "abc123", "abc123" }, + { "a/b", "a%2Fb", "a%2Fb" }, + { "a/b/c/_refresh", "a%2Fb%2Fc%2F_refresh", "a%2Fb%2Fc%2F_refresh" }, + { "a:b:c:d:e::1.0", "a%3Ab%3Ac%3Ad%3Ae%3A%3A1.0", "a:b:c:d:e::1.0" }, + { "a,b,c", "a%2Cb%2Cc", "a,b,c" } } + ); + } + + private final String decoded; + private final String encodedRFC3986Unreserved; + private final String encodedRFC3986PathSafe; + + public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986PathSafe) { + this.decoded = decoded; + this.encodedRFC3986Unreserved = encodedRFC3986Unreserved; + this.encodedRFC3986PathSafe = encodedRFC3986PathSafe; + } + + @Test + public void test_RFC3986_UNRESERVED_encoding() { + assertEquals(this.encodedRFC3986Unreserved, PercentCodec.RFC3986_UNRESERVED.encode(this.decoded)); + } + + @Test + public void test_RFC3986_UNRESERVED_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_UNRESERVED.decode(this.encodedRFC3986Unreserved)); + } + + @Test + public void test_RFC3986_PATHSAFE_encoding() { + assertEquals(this.encodedRFC3986PathSafe, PercentCodec.RFC3986_PATHSAFE.encode(this.decoded)); + } + + @Test + public void test_RFC3986_PATHSAFE_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_PATHSAFE.decode(this.encodedRFC3986PathSafe)); + } +} From 7fcb24d3c15a0051ec88b9a3d608f3b9cf21a84e Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Tue, 30 Jul 2024 14:18:31 +1200 Subject: [PATCH 2/6] Refactor PercentCodec a bit Signed-off-by: Thomas Farr --- .../opensearch/client/util/PathEncoder.java | 1 - .../opensearch/client/util/PercentCodec.java | 157 ++++++++---------- 2 files changed, 72 insertions(+), 86 deletions(-) diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index 0d1aa35703..2574aaa26a 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -28,7 +28,6 @@ public static void setCodec(PercentCodec codec) { PathEncoder.codec = codec; } - public static String encode(String pathSegment) { return getCodec().encode(pathSegment); } diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java index f0f6c47813..a672c71c38 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java +++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java @@ -21,96 +21,83 @@ *

*/ public class PercentCodec { - private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256); - private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256); - private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256); - private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256); - private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256); - private static final BitSet RFC3986_URIC_CHARS = new BitSet(256); - - static { - RFC3986_GEN_DELIMS_CHARS.set(':'); - RFC3986_GEN_DELIMS_CHARS.set('/'); - RFC3986_GEN_DELIMS_CHARS.set('?'); - RFC3986_GEN_DELIMS_CHARS.set('#'); - RFC3986_GEN_DELIMS_CHARS.set('['); - RFC3986_GEN_DELIMS_CHARS.set(']'); - RFC3986_GEN_DELIMS_CHARS.set('@'); - - RFC3986_SUB_DELIMS_CHARS.set('!'); - RFC3986_SUB_DELIMS_CHARS.set('$'); - RFC3986_SUB_DELIMS_CHARS.set('&'); - RFC3986_SUB_DELIMS_CHARS.set('\''); - RFC3986_SUB_DELIMS_CHARS.set('('); - RFC3986_SUB_DELIMS_CHARS.set(')'); - RFC3986_SUB_DELIMS_CHARS.set('*'); - RFC3986_SUB_DELIMS_CHARS.set('+'); - RFC3986_SUB_DELIMS_CHARS.set(','); - RFC3986_SUB_DELIMS_CHARS.set(';'); - RFC3986_SUB_DELIMS_CHARS.set('='); - - for (int i = 'a'; i <= 'z'; i++) { - RFC3986_UNRESERVED_CHARS.set(i); - } - for (int i = 'A'; i <= 'Z'; i++) { - RFC3986_UNRESERVED_CHARS.set(i); - } - // numeric characters - for (int i = '0'; i <= '9'; i++) { - RFC3986_UNRESERVED_CHARS.set(i); - } - RFC3986_UNRESERVED_CHARS.set('-'); - RFC3986_UNRESERVED_CHARS.set('.'); - RFC3986_UNRESERVED_CHARS.set('_'); - RFC3986_UNRESERVED_CHARS.set('~'); - - RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS); - RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); - RFC3986_PATHSAFE_NC_CHARS.set('@'); - - RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS); - RFC3986_PATHSAFE_CHARS.set(':'); - - RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); - RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS); - } + private static class Chars { + private final BitSet set = new BitSet(256); - private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256); - - static { - // Alphanumeric characters - for (int i = 'a'; i <= 'z'; i++) { - RFC5987_UNRESERVED_CHARS.set(i); - } - for (int i = 'A'; i <= 'Z'; i++) { - RFC5987_UNRESERVED_CHARS.set(i); - } - for (int i = '0'; i <= '9'; i++) { - RFC5987_UNRESERVED_CHARS.set(i); - } - - // Additional characters as per RFC 5987 attr-char - RFC5987_UNRESERVED_CHARS.set('!'); - RFC5987_UNRESERVED_CHARS.set('#'); - RFC5987_UNRESERVED_CHARS.set('$'); - RFC5987_UNRESERVED_CHARS.set('&'); - RFC5987_UNRESERVED_CHARS.set('+'); - RFC5987_UNRESERVED_CHARS.set('-'); - RFC5987_UNRESERVED_CHARS.set('.'); - RFC5987_UNRESERVED_CHARS.set('^'); - RFC5987_UNRESERVED_CHARS.set('_'); - RFC5987_UNRESERVED_CHARS.set('`'); - RFC5987_UNRESERVED_CHARS.set('|'); - RFC5987_UNRESERVED_CHARS.set('~'); + public void add(char... chars) { + for (char c : chars) { + set.set(c); + } + } + + public void addRange(char start, char end) { + set.set(start, end + 1); + } + + public void add(Chars set) { + this.set.or(set.set); + } + + public boolean contains(int c) { + return set.get(c); + } } + private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() { + { + add(':', '/', '?', '#', '[', ']', '@'); + } + }; + private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() { + { + add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='); + } + }; + private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() { + { + addRange('a', 'z'); + addRange('A', 'Z'); + addRange('0', '9'); + add('-', '.', '_', '~'); + } + }; + private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() { + { + add(RFC3986_UNRESERVED_CHARS); + add(RFC3986_SUB_DELIMS_CHARS); + add('@'); + } + }; + private static final Chars RFC3986_PATH_CHARS = new Chars() { + { + add(RFC3986_PATH_NO_COLON_CHARS); + add(':'); + } + }; + private static final Chars RFC3986_URIC_CHARS = new Chars() { + { + add(RFC3986_SUB_DELIMS_CHARS); + add(RFC3986_UNRESERVED_CHARS); + } + }; + + private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() { + { + addRange('a', 'z'); + addRange('A', 'Z'); + addRange('0', '9'); + // Additional characters as per RFC 5987 attr-char + add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~'); + } + }; + private static final int RADIX = 16; private static void encode( final StringBuilder buf, final CharSequence content, final Charset charset, - final BitSet safeChars, + final Chars safeChars, final boolean blankAsPlus ) { if (content == null) { @@ -120,7 +107,7 @@ private static void encode( final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb); while (bb.hasRemaining()) { final int b = bb.get() & 0xff; - if (safeChars.get(b)) { + if (safeChars.contains(b)) { buf.append((char) b); } else if (blankAsPlus && b == ' ') { buf.append("+"); @@ -165,12 +152,12 @@ private static String decode(final CharSequence content, final Charset charset, } public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS); - public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS); + public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATH_CHARS); public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS); - private final BitSet unreserved; + private final Chars unreserved; - private PercentCodec(final BitSet unreserved) { + private PercentCodec(final Chars unreserved) { this.unreserved = unreserved; } From 2820ff8fb8192a168689e9d1fda6494b64b1c940 Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Tue, 30 Jul 2024 14:28:46 +1200 Subject: [PATCH 3/6] Add change log Signed-off-by: Thomas Farr --- CHANGELOG.md | 1 + .../java/org/opensearch/client/util/PathEncoder.java | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6093bfe19..94fe345f44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ This section is for maintaining a changelog for all breaking changes for the cli ### Dependencies ### Changed +- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109)) ### Deprecated diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index 2574aaa26a..04fd89e3f0 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -9,12 +9,18 @@ package org.opensearch.client.util; public class PathEncoder { - public static final PercentCodec DEFAULT_CODEC = PercentCodec.RFC3986_UNRESERVED; /** * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding. */ @Deprecated - public static final PercentCodec APACHE_HTTP_CLIENT_4_COMPAT = PercentCodec.RFC3986_PATHSAFE; + public static final PercentCodec APACHE_HTTP_CLIENT_4_EQUIV_CODEC = PercentCodec.RFC3986_PATHSAFE; + /** + * Percent encoding codec that matches Apache HTTP Client 5's path segment encoding. + */ + public static final PercentCodec APACHE_HTTP_CLIENT_5_EQUIV_CODEC = PercentCodec.RFC3986_UNRESERVED; + + public static final PercentCodec DEFAULT_CODEC = APACHE_HTTP_CLIENT_5_EQUIV_CODEC; + private static PercentCodec codec; public static PercentCodec getCodec() { From 75f9059abd9b8b1e2b54271bbdd6cba208245304 Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Wed, 31 Jul 2024 18:40:47 +1200 Subject: [PATCH 4/6] Switch to system property Signed-off-by: Thomas Farr --- .../opensearch/client/util/PathEncoder.java | 51 ++++++++++--------- .../opensearch/client/util/PercentCodec.java | 4 +- .../client/util/PercentCodecTest.java | 16 +++--- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index 04fd89e3f0..d9dde38df3 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -8,37 +8,42 @@ package org.opensearch.client.util; +import java.util.Optional; + public class PathEncoder { - /** - * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding. - */ - @Deprecated - public static final PercentCodec APACHE_HTTP_CLIENT_4_EQUIV_CODEC = PercentCodec.RFC3986_PATHSAFE; - /** - * Percent encoding codec that matches Apache HTTP Client 5's path segment encoding. - */ - public static final PercentCodec APACHE_HTTP_CLIENT_5_EQUIV_CODEC = PercentCodec.RFC3986_UNRESERVED; - - public static final PercentCodec DEFAULT_CODEC = APACHE_HTTP_CLIENT_5_EQUIV_CODEC; - - private static PercentCodec codec; - - public static PercentCodec getCodec() { - if (codec == null) { - codec = DEFAULT_CODEC; + private enum Encoding { + RFC3986_PATH(PercentCodec.RFC3986_PATH), + HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH), + + RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED), + HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED); + + private final PercentCodec percentCodec; + + Encoding(PercentCodec percentCodec) { + this.percentCodec = percentCodec; } - return codec; - } - public static void setCodec(PercentCodec codec) { - PathEncoder.codec = codec; + static Optional get(String name) { + try { + return Optional.of(Encoding.valueOf(name.toUpperCase())); + } catch (Exception ignored) { + return Optional.empty(); + } + } } + private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding"; + private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV; + + private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY)) + .flatMap(Encoding::get) + .orElse(ENCODING_DEFAULT); public static String encode(String pathSegment) { - return getCodec().encode(pathSegment); + return ENCODING.percentCodec.encode(pathSegment); } public static void encode(StringBuilder dest, CharSequence pathSegment) { - getCodec().encode(dest, pathSegment); + ENCODING.percentCodec.encode(dest, pathSegment); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java index a672c71c38..9fb50637f9 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java +++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java @@ -20,7 +20,7 @@ * Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java *

*/ -public class PercentCodec { +class PercentCodec { private static class Chars { private final BitSet set = new BitSet(256); @@ -152,7 +152,7 @@ private static String decode(final CharSequence content, final Charset charset, } public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS); - public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATH_CHARS); + public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS); public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS); private final Chars unreserved; diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java index 178dd3db03..7a13f69c58 100644 --- a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java +++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java @@ -22,7 +22,7 @@ public class PercentCodecTest { public static Collection testData() { return Arrays.asList( new Object[][] { - // , , + // , , { "test", "test", "test" }, { "abc123", "abc123", "abc123" }, { "a/b", "a%2Fb", "a%2Fb" }, @@ -34,12 +34,12 @@ public static Collection testData() { private final String decoded; private final String encodedRFC3986Unreserved; - private final String encodedRFC3986PathSafe; + private final String encodedRFC3986Path; - public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986PathSafe) { + public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986Path) { this.decoded = decoded; this.encodedRFC3986Unreserved = encodedRFC3986Unreserved; - this.encodedRFC3986PathSafe = encodedRFC3986PathSafe; + this.encodedRFC3986Path = encodedRFC3986Path; } @Test @@ -53,12 +53,12 @@ public void test_RFC3986_UNRESERVED_decoding() { } @Test - public void test_RFC3986_PATHSAFE_encoding() { - assertEquals(this.encodedRFC3986PathSafe, PercentCodec.RFC3986_PATHSAFE.encode(this.decoded)); + public void test_RFC3986_PATH_encoding() { + assertEquals(this.encodedRFC3986Path, PercentCodec.RFC3986_PATH.encode(this.decoded)); } @Test - public void test_RFC3986_PATHSAFE_decoding() { - assertEquals(this.decoded, PercentCodec.RFC3986_PATHSAFE.decode(this.encodedRFC3986PathSafe)); + public void test_RFC3986_PATH_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_PATH.decode(this.encodedRFC3986Path)); } } From 8430323d77f439e244156d0fe8d3bfdca6c701d7 Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Wed, 31 Jul 2024 18:46:19 +1200 Subject: [PATCH 5/6] spotless Signed-off-by: Thomas Farr --- .../main/java/org/opensearch/client/util/PathEncoder.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index d9dde38df3..5d43b7c19a 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -32,12 +32,13 @@ static Optional get(String name) { } } } + private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding"; private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV; private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY)) - .flatMap(Encoding::get) - .orElse(ENCODING_DEFAULT); + .flatMap(Encoding::get) + .orElse(ENCODING_DEFAULT); public static String encode(String pathSegment) { return ENCODING.percentCodec.encode(pathSegment); From f436a3d0284a8d589bd09ad89049bbbc155bae8e Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Thu, 1 Aug 2024 08:19:11 +1200 Subject: [PATCH 6/6] Add UPGRADING note Signed-off-by: Thomas Farr --- UPGRADING.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/UPGRADING.md b/UPGRADING.md index 4e437fa770..1b1a91c7c6 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -1,6 +1,9 @@ # UPGRADING -## [UPGRADING 2.x to 3.0] +## Upgrading 2.x to 3.0 +### URL Path Encoding +- The default URL path encoding has been changed to be more conservative. Previously the `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `@` and `:` characters were left un-encoded, they will now be percent-encoded. If you require the previous behavior you can specify the `org.opensearch.path.encoding=HTTP_CLIENT_V4_EQUIV` system property. + ### SearchAfter of SearchRequest type - Changed SearchAfter of SearchRequest type to FieldValue instead of String ([#769](https://github.com/opensearch-project/opensearch-java/pull/769)) - Consider using `FieldValue.of` to make string type values compatible.