diff --git a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java index 6409c845bc..f886c12ee7 100644 --- a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java +++ b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) { } public static void pathEncode(String src, StringBuilder dest) { - dest.append(PathEncoder.encode(src)); + PathEncoder.encode(dest, src); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java index c9345a003f..0d1aa35703 100644 --- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java +++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java @@ -8,59 +8,32 @@ package org.opensearch.client.util; -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.Collections; - public class PathEncoder { - private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils"; - private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils"; - private final static MethodHandle FORMAT_SEGMENTS_MH; - - static { - Class clazz = null; - try { - // Try Apache HttpClient5 first since this is a default one - clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS); - } catch (final ClassNotFoundException ex) { - try { - // Fallback to Apache HttpClient4 - clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS); - } catch (final ClassNotFoundException ex1) { - clazz = null; - } + public static final PercentCodec DEFAULT_CODEC = PercentCodec.RFC3986_UNRESERVED; + /** + * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding. + */ + @Deprecated + public static final PercentCodec APACHE_HTTP_CLIENT_4_COMPAT = PercentCodec.RFC3986_PATHSAFE; + private static PercentCodec codec; + + public static PercentCodec getCodec() { + if (codec == null) { + codec = DEFAULT_CODEC; } + return codec; + } - if (clazz == null) { - throw new IllegalStateException( - "Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath" - ); - } + public static void setCodec(PercentCodec codec) { + PathEncoder.codec = codec; + } - try { - FORMAT_SEGMENTS_MH = MethodHandles.lookup() - .findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class)); - } catch (final NoSuchMethodException | IllegalAccessException ex) { - throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class"); - } + + public static String encode(String pathSegment) { + return getCodec().encode(pathSegment); } - public static String encode(String uri) { - try { - return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1); - } catch (final Throwable ex) { - throw new RuntimeException("Unable to encode URI: " + uri, ex); - } + public static void encode(StringBuilder dest, CharSequence pathSegment) { + getCodec().encode(dest, pathSegment); } } diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java new file mode 100644 index 0000000000..f0f6c47813 --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java @@ -0,0 +1,193 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.BitSet; + +/** + * Percent-encoding. + *

+ * Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java + *

+ */ +public class PercentCodec { + private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256); + private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256); + private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256); + private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256); + private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256); + private static final BitSet RFC3986_URIC_CHARS = new BitSet(256); + + static { + RFC3986_GEN_DELIMS_CHARS.set(':'); + RFC3986_GEN_DELIMS_CHARS.set('/'); + RFC3986_GEN_DELIMS_CHARS.set('?'); + RFC3986_GEN_DELIMS_CHARS.set('#'); + RFC3986_GEN_DELIMS_CHARS.set('['); + RFC3986_GEN_DELIMS_CHARS.set(']'); + RFC3986_GEN_DELIMS_CHARS.set('@'); + + RFC3986_SUB_DELIMS_CHARS.set('!'); + RFC3986_SUB_DELIMS_CHARS.set('$'); + RFC3986_SUB_DELIMS_CHARS.set('&'); + RFC3986_SUB_DELIMS_CHARS.set('\''); + RFC3986_SUB_DELIMS_CHARS.set('('); + RFC3986_SUB_DELIMS_CHARS.set(')'); + RFC3986_SUB_DELIMS_CHARS.set('*'); + RFC3986_SUB_DELIMS_CHARS.set('+'); + RFC3986_SUB_DELIMS_CHARS.set(','); + RFC3986_SUB_DELIMS_CHARS.set(';'); + RFC3986_SUB_DELIMS_CHARS.set('='); + + for (int i = 'a'; i <= 'z'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + // numeric characters + for (int i = '0'; i <= '9'; i++) { + RFC3986_UNRESERVED_CHARS.set(i); + } + RFC3986_UNRESERVED_CHARS.set('-'); + RFC3986_UNRESERVED_CHARS.set('.'); + RFC3986_UNRESERVED_CHARS.set('_'); + RFC3986_UNRESERVED_CHARS.set('~'); + + RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS); + RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); + RFC3986_PATHSAFE_NC_CHARS.set('@'); + + RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS); + RFC3986_PATHSAFE_CHARS.set(':'); + + RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS); + RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS); + } + + private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256); + + static { + // Alphanumeric characters + for (int i = 'a'; i <= 'z'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + for (int i = '0'; i <= '9'; i++) { + RFC5987_UNRESERVED_CHARS.set(i); + } + + // Additional characters as per RFC 5987 attr-char + RFC5987_UNRESERVED_CHARS.set('!'); + RFC5987_UNRESERVED_CHARS.set('#'); + RFC5987_UNRESERVED_CHARS.set('$'); + RFC5987_UNRESERVED_CHARS.set('&'); + RFC5987_UNRESERVED_CHARS.set('+'); + RFC5987_UNRESERVED_CHARS.set('-'); + RFC5987_UNRESERVED_CHARS.set('.'); + RFC5987_UNRESERVED_CHARS.set('^'); + RFC5987_UNRESERVED_CHARS.set('_'); + RFC5987_UNRESERVED_CHARS.set('`'); + RFC5987_UNRESERVED_CHARS.set('|'); + RFC5987_UNRESERVED_CHARS.set('~'); + } + + private static final int RADIX = 16; + + private static void encode( + final StringBuilder buf, + final CharSequence content, + final Charset charset, + final BitSet safeChars, + final boolean blankAsPlus + ) { + if (content == null) { + return; + } + final CharBuffer cb = CharBuffer.wrap(content); + final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb); + while (bb.hasRemaining()) { + final int b = bb.get() & 0xff; + if (safeChars.get(b)) { + buf.append((char) b); + } else if (blankAsPlus && b == ' ') { + buf.append("+"); + } else { + buf.append("%"); + final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); + final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); + buf.append(hex1); + buf.append(hex2); + } + } + } + + private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) { + if (content == null) { + return null; + } + final ByteBuffer bb = ByteBuffer.allocate(content.length()); + final CharBuffer cb = CharBuffer.wrap(content); + while (cb.hasRemaining()) { + final char c = cb.get(); + if (c == '%' && cb.remaining() >= 2) { + final char uc = cb.get(); + final char lc = cb.get(); + final int u = Character.digit(uc, RADIX); + final int l = Character.digit(lc, RADIX); + if (u != -1 && l != -1) { + bb.put((byte) ((u << 4) + l)); + } else { + bb.put((byte) '%'); + bb.put((byte) uc); + bb.put((byte) lc); + } + } else if (plusAsBlank && c == '+') { + bb.put((byte) ' '); + } else { + bb.put((byte) c); + } + } + bb.flip(); + return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString(); + } + + public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS); + public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS); + public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS); + + private final BitSet unreserved; + + private PercentCodec(final BitSet unreserved) { + this.unreserved = unreserved; + } + + public void encode(final StringBuilder buf, final CharSequence content) { + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + } + + public String encode(final CharSequence content) { + if (content == null) { + return null; + } + final StringBuilder buf = new StringBuilder(); + encode(buf, content, StandardCharsets.UTF_8, unreserved, false); + return buf.toString(); + } + + public String decode(final CharSequence content) { + return decode(content, StandardCharsets.UTF_8, false); + } +} diff --git a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java index b37f98622a..680f8ebe6d 100644 --- a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java +++ b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java @@ -57,19 +57,10 @@ public void testArrayPathParameter() { assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - - } else { - assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a", "b", "c")); - if (isHttpClient5Present()) { - assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @Test @@ -80,11 +71,7 @@ public void testPathEncoding() { assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); req = RefreshRequest.of(b -> b.index("a/b", "c/d")); - if (isHttpClient5Present()) { - assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } else { - assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); - } + assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req)); } @@ -103,13 +90,4 @@ public void testArrayQueryParameter() { req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed)); assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards")); } - - private static boolean isHttpClient5Present() { - try { - Class.forName("org.apache.hc.core5.net.URLEncodedUtils"); - return true; - } catch (ClassNotFoundException e) { - return false; - } - } } diff --git a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java b/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java deleted file mode 100644 index 0e0a5f8c15..0000000000 --- a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.client.util; - -import static org.junit.Assert.assertEquals; - -import org.junit.Test; - -public class PathEncoderTest { - - @Test - public void testEncode() { - // Test with a simple string - String simpleString = "test"; - String encodedSimpleString = PathEncoder.encode(simpleString); - assertEquals(simpleString, encodedSimpleString); - - // Test with a string that contains special characters - String specialString = "a/b"; - String encodedSpecialString = PathEncoder.encode(specialString); - assertEquals("a%2Fb", encodedSpecialString); - - // Test with a string that contains alphanumeric characters - String alphanumericString = "abc123"; - String encodedAlphanumericString = PathEncoder.encode(alphanumericString); - assertEquals("abc123", encodedAlphanumericString); - - // Test with a string that contains multiple segments - String multiSegmentString = "a/b/c/_refresh"; - String encodedMultiSegmentString = PathEncoder.encode(multiSegmentString); - assertEquals("a%2Fb%2Fc%2F_refresh", encodedMultiSegmentString); - - // Test with a string that contains colon segment - String colonSegmentString = "a:b:c::2.0"; - String encodedColonSegmentString = PathEncoder.encode(colonSegmentString); - assertEquals("a%3Ab%3Ac%3A%3A2.0", encodedColonSegmentString); - } -} diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java new file mode 100644 index 0000000000..178dd3db03 --- /dev/null +++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.client.util; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PercentCodecTest { + @Parameterized.Parameters + public static Collection testData() { + return Arrays.asList( + new Object[][] { + // , , + { "test", "test", "test" }, + { "abc123", "abc123", "abc123" }, + { "a/b", "a%2Fb", "a%2Fb" }, + { "a/b/c/_refresh", "a%2Fb%2Fc%2F_refresh", "a%2Fb%2Fc%2F_refresh" }, + { "a:b:c:d:e::1.0", "a%3Ab%3Ac%3Ad%3Ae%3A%3A1.0", "a:b:c:d:e::1.0" }, + { "a,b,c", "a%2Cb%2Cc", "a,b,c" } } + ); + } + + private final String decoded; + private final String encodedRFC3986Unreserved; + private final String encodedRFC3986PathSafe; + + public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986PathSafe) { + this.decoded = decoded; + this.encodedRFC3986Unreserved = encodedRFC3986Unreserved; + this.encodedRFC3986PathSafe = encodedRFC3986PathSafe; + } + + @Test + public void test_RFC3986_UNRESERVED_encoding() { + assertEquals(this.encodedRFC3986Unreserved, PercentCodec.RFC3986_UNRESERVED.encode(this.decoded)); + } + + @Test + public void test_RFC3986_UNRESERVED_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_UNRESERVED.decode(this.encodedRFC3986Unreserved)); + } + + @Test + public void test_RFC3986_PATHSAFE_encoding() { + assertEquals(this.encodedRFC3986PathSafe, PercentCodec.RFC3986_PATHSAFE.encode(this.decoded)); + } + + @Test + public void test_RFC3986_PATHSAFE_decoding() { + assertEquals(this.decoded, PercentCodec.RFC3986_PATHSAFE.decode(this.encodedRFC3986PathSafe)); + } +}