From b5901c97c59ea04a980d74e73549e0cd6b4bc788 Mon Sep 17 00:00:00 2001
From: Thomas Farr
Date: Thu, 25 Jul 2024 17:02:56 +1200
Subject: [PATCH 1/6] Use own copy of PercentCodec for URI path encoding
Adapted from Apache HttpComponents HttpCore v5's https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java
Signed-off-by: Thomas Farr
---
.../transport/endpoints/SimpleEndpoint.java | 2 +-
.../opensearch/client/util/PathEncoder.java | 69 ++-----
.../opensearch/client/util/PercentCodec.java | 193 ++++++++++++++++++
.../client/opensearch/model/EndpointTest.java | 28 +--
.../client/util/PathEncoderTest.java | 44 ----
.../client/util/PercentCodecTest.java | 64 ++++++
6 files changed, 282 insertions(+), 118 deletions(-)
create mode 100644 java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
delete mode 100644 java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java
create mode 100644 java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
diff --git a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java
index 6409c845bc..f886c12ee7 100644
--- a/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java
+++ b/java-client/src/main/java/org/opensearch/client/transport/endpoints/SimpleEndpoint.java
@@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) {
}
public static void pathEncode(String src, StringBuilder dest) {
- dest.append(PathEncoder.encode(src));
+ PathEncoder.encode(dest, src);
}
}
diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
index c9345a003f..0d1aa35703 100644
--- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
+++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
@@ -8,59 +8,32 @@
package org.opensearch.client.util;
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-import java.lang.invoke.MethodHandle;
-import java.lang.invoke.MethodHandles;
-import java.lang.invoke.MethodType;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-
public class PathEncoder {
- private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils";
- private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils";
- private final static MethodHandle FORMAT_SEGMENTS_MH;
-
- static {
- Class> clazz = null;
- try {
- // Try Apache HttpClient5 first since this is a default one
- clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS);
- } catch (final ClassNotFoundException ex) {
- try {
- // Fallback to Apache HttpClient4
- clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS);
- } catch (final ClassNotFoundException ex1) {
- clazz = null;
- }
+ public static final PercentCodec DEFAULT_CODEC = PercentCodec.RFC3986_UNRESERVED;
+ /**
+ * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding.
+ */
+ @Deprecated
+ public static final PercentCodec APACHE_HTTP_CLIENT_4_COMPAT = PercentCodec.RFC3986_PATHSAFE;
+ private static PercentCodec codec;
+
+ public static PercentCodec getCodec() {
+ if (codec == null) {
+ codec = DEFAULT_CODEC;
}
+ return codec;
+ }
- if (clazz == null) {
- throw new IllegalStateException(
- "Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath"
- );
- }
+ public static void setCodec(PercentCodec codec) {
+ PathEncoder.codec = codec;
+ }
- try {
- FORMAT_SEGMENTS_MH = MethodHandles.lookup()
- .findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class));
- } catch (final NoSuchMethodException | IllegalAccessException ex) {
- throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class");
- }
+
+ public static String encode(String pathSegment) {
+ return getCodec().encode(pathSegment);
}
- public static String encode(String uri) {
- try {
- return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1);
- } catch (final Throwable ex) {
- throw new RuntimeException("Unable to encode URI: " + uri, ex);
- }
+ public static void encode(StringBuilder dest, CharSequence pathSegment) {
+ getCodec().encode(dest, pathSegment);
}
}
diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
new file mode 100644
index 0000000000..f0f6c47813
--- /dev/null
+++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
@@ -0,0 +1,193 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.client.util;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.BitSet;
+
+/**
+ * Percent-encoding.
+ *
+ * Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java
+ *
+ */
+public class PercentCodec {
+ private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256);
+ private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256);
+ private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256);
+ private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256);
+ private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256);
+ private static final BitSet RFC3986_URIC_CHARS = new BitSet(256);
+
+ static {
+ RFC3986_GEN_DELIMS_CHARS.set(':');
+ RFC3986_GEN_DELIMS_CHARS.set('/');
+ RFC3986_GEN_DELIMS_CHARS.set('?');
+ RFC3986_GEN_DELIMS_CHARS.set('#');
+ RFC3986_GEN_DELIMS_CHARS.set('[');
+ RFC3986_GEN_DELIMS_CHARS.set(']');
+ RFC3986_GEN_DELIMS_CHARS.set('@');
+
+ RFC3986_SUB_DELIMS_CHARS.set('!');
+ RFC3986_SUB_DELIMS_CHARS.set('$');
+ RFC3986_SUB_DELIMS_CHARS.set('&');
+ RFC3986_SUB_DELIMS_CHARS.set('\'');
+ RFC3986_SUB_DELIMS_CHARS.set('(');
+ RFC3986_SUB_DELIMS_CHARS.set(')');
+ RFC3986_SUB_DELIMS_CHARS.set('*');
+ RFC3986_SUB_DELIMS_CHARS.set('+');
+ RFC3986_SUB_DELIMS_CHARS.set(',');
+ RFC3986_SUB_DELIMS_CHARS.set(';');
+ RFC3986_SUB_DELIMS_CHARS.set('=');
+
+ for (int i = 'a'; i <= 'z'; i++) {
+ RFC3986_UNRESERVED_CHARS.set(i);
+ }
+ for (int i = 'A'; i <= 'Z'; i++) {
+ RFC3986_UNRESERVED_CHARS.set(i);
+ }
+ // numeric characters
+ for (int i = '0'; i <= '9'; i++) {
+ RFC3986_UNRESERVED_CHARS.set(i);
+ }
+ RFC3986_UNRESERVED_CHARS.set('-');
+ RFC3986_UNRESERVED_CHARS.set('.');
+ RFC3986_UNRESERVED_CHARS.set('_');
+ RFC3986_UNRESERVED_CHARS.set('~');
+
+ RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS);
+ RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
+ RFC3986_PATHSAFE_NC_CHARS.set('@');
+
+ RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS);
+ RFC3986_PATHSAFE_CHARS.set(':');
+
+ RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
+ RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS);
+ }
+
+ private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256);
+
+ static {
+ // Alphanumeric characters
+ for (int i = 'a'; i <= 'z'; i++) {
+ RFC5987_UNRESERVED_CHARS.set(i);
+ }
+ for (int i = 'A'; i <= 'Z'; i++) {
+ RFC5987_UNRESERVED_CHARS.set(i);
+ }
+ for (int i = '0'; i <= '9'; i++) {
+ RFC5987_UNRESERVED_CHARS.set(i);
+ }
+
+ // Additional characters as per RFC 5987 attr-char
+ RFC5987_UNRESERVED_CHARS.set('!');
+ RFC5987_UNRESERVED_CHARS.set('#');
+ RFC5987_UNRESERVED_CHARS.set('$');
+ RFC5987_UNRESERVED_CHARS.set('&');
+ RFC5987_UNRESERVED_CHARS.set('+');
+ RFC5987_UNRESERVED_CHARS.set('-');
+ RFC5987_UNRESERVED_CHARS.set('.');
+ RFC5987_UNRESERVED_CHARS.set('^');
+ RFC5987_UNRESERVED_CHARS.set('_');
+ RFC5987_UNRESERVED_CHARS.set('`');
+ RFC5987_UNRESERVED_CHARS.set('|');
+ RFC5987_UNRESERVED_CHARS.set('~');
+ }
+
+ private static final int RADIX = 16;
+
+ private static void encode(
+ final StringBuilder buf,
+ final CharSequence content,
+ final Charset charset,
+ final BitSet safeChars,
+ final boolean blankAsPlus
+ ) {
+ if (content == null) {
+ return;
+ }
+ final CharBuffer cb = CharBuffer.wrap(content);
+ final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
+ while (bb.hasRemaining()) {
+ final int b = bb.get() & 0xff;
+ if (safeChars.get(b)) {
+ buf.append((char) b);
+ } else if (blankAsPlus && b == ' ') {
+ buf.append("+");
+ } else {
+ buf.append("%");
+ final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
+ final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
+ buf.append(hex1);
+ buf.append(hex2);
+ }
+ }
+ }
+
+ private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) {
+ if (content == null) {
+ return null;
+ }
+ final ByteBuffer bb = ByteBuffer.allocate(content.length());
+ final CharBuffer cb = CharBuffer.wrap(content);
+ while (cb.hasRemaining()) {
+ final char c = cb.get();
+ if (c == '%' && cb.remaining() >= 2) {
+ final char uc = cb.get();
+ final char lc = cb.get();
+ final int u = Character.digit(uc, RADIX);
+ final int l = Character.digit(lc, RADIX);
+ if (u != -1 && l != -1) {
+ bb.put((byte) ((u << 4) + l));
+ } else {
+ bb.put((byte) '%');
+ bb.put((byte) uc);
+ bb.put((byte) lc);
+ }
+ } else if (plusAsBlank && c == '+') {
+ bb.put((byte) ' ');
+ } else {
+ bb.put((byte) c);
+ }
+ }
+ bb.flip();
+ return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString();
+ }
+
+ public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
+ public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS);
+ public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);
+
+ private final BitSet unreserved;
+
+ private PercentCodec(final BitSet unreserved) {
+ this.unreserved = unreserved;
+ }
+
+ public void encode(final StringBuilder buf, final CharSequence content) {
+ encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
+ }
+
+ public String encode(final CharSequence content) {
+ if (content == null) {
+ return null;
+ }
+ final StringBuilder buf = new StringBuilder();
+ encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
+ return buf.toString();
+ }
+
+ public String decode(final CharSequence content) {
+ return decode(content, StandardCharsets.UTF_8, false);
+ }
+}
diff --git a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java
index b37f98622a..680f8ebe6d 100644
--- a/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java
+++ b/java-client/src/test/java/org/opensearch/client/opensearch/model/EndpointTest.java
@@ -57,19 +57,10 @@ public void testArrayPathParameter() {
assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
req = RefreshRequest.of(b -> b.index("a", "b"));
- if (isHttpClient5Present()) {
- assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
-
- } else {
- assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
- }
+ assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
req = RefreshRequest.of(b -> b.index("a", "b", "c"));
- if (isHttpClient5Present()) {
- assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
- } else {
- assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
- }
+ assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
@Test
@@ -80,11 +71,7 @@ public void testPathEncoding() {
assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
req = RefreshRequest.of(b -> b.index("a/b", "c/d"));
- if (isHttpClient5Present()) {
- assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
- } else {
- assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
- }
+ assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
@@ -103,13 +90,4 @@ public void testArrayQueryParameter() {
req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed));
assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards"));
}
-
- private static boolean isHttpClient5Present() {
- try {
- Class.forName("org.apache.hc.core5.net.URLEncodedUtils");
- return true;
- } catch (ClassNotFoundException e) {
- return false;
- }
- }
}
diff --git a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java b/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java
deleted file mode 100644
index 0e0a5f8c15..0000000000
--- a/java-client/src/test/java/org/opensearch/client/util/PathEncoderTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.client.util;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Test;
-
-public class PathEncoderTest {
-
- @Test
- public void testEncode() {
- // Test with a simple string
- String simpleString = "test";
- String encodedSimpleString = PathEncoder.encode(simpleString);
- assertEquals(simpleString, encodedSimpleString);
-
- // Test with a string that contains special characters
- String specialString = "a/b";
- String encodedSpecialString = PathEncoder.encode(specialString);
- assertEquals("a%2Fb", encodedSpecialString);
-
- // Test with a string that contains alphanumeric characters
- String alphanumericString = "abc123";
- String encodedAlphanumericString = PathEncoder.encode(alphanumericString);
- assertEquals("abc123", encodedAlphanumericString);
-
- // Test with a string that contains multiple segments
- String multiSegmentString = "a/b/c/_refresh";
- String encodedMultiSegmentString = PathEncoder.encode(multiSegmentString);
- assertEquals("a%2Fb%2Fc%2F_refresh", encodedMultiSegmentString);
-
- // Test with a string that contains colon segment
- String colonSegmentString = "a:b:c::2.0";
- String encodedColonSegmentString = PathEncoder.encode(colonSegmentString);
- assertEquals("a%3Ab%3Ac%3A%3A2.0", encodedColonSegmentString);
- }
-}
diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
new file mode 100644
index 0000000000..178dd3db03
--- /dev/null
+++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
@@ -0,0 +1,64 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.client.util;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.Collection;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class PercentCodecTest {
+ @Parameterized.Parameters
+ public static Collection
*/
public class PercentCodec {
- private static final BitSet RFC3986_GEN_DELIMS_CHARS = new BitSet(256);
- private static final BitSet RFC3986_SUB_DELIMS_CHARS = new BitSet(256);
- private static final BitSet RFC3986_UNRESERVED_CHARS = new BitSet(256);
- private static final BitSet RFC3986_PATHSAFE_NC_CHARS = new BitSet(256);
- private static final BitSet RFC3986_PATHSAFE_CHARS = new BitSet(256);
- private static final BitSet RFC3986_URIC_CHARS = new BitSet(256);
-
- static {
- RFC3986_GEN_DELIMS_CHARS.set(':');
- RFC3986_GEN_DELIMS_CHARS.set('/');
- RFC3986_GEN_DELIMS_CHARS.set('?');
- RFC3986_GEN_DELIMS_CHARS.set('#');
- RFC3986_GEN_DELIMS_CHARS.set('[');
- RFC3986_GEN_DELIMS_CHARS.set(']');
- RFC3986_GEN_DELIMS_CHARS.set('@');
-
- RFC3986_SUB_DELIMS_CHARS.set('!');
- RFC3986_SUB_DELIMS_CHARS.set('$');
- RFC3986_SUB_DELIMS_CHARS.set('&');
- RFC3986_SUB_DELIMS_CHARS.set('\'');
- RFC3986_SUB_DELIMS_CHARS.set('(');
- RFC3986_SUB_DELIMS_CHARS.set(')');
- RFC3986_SUB_DELIMS_CHARS.set('*');
- RFC3986_SUB_DELIMS_CHARS.set('+');
- RFC3986_SUB_DELIMS_CHARS.set(',');
- RFC3986_SUB_DELIMS_CHARS.set(';');
- RFC3986_SUB_DELIMS_CHARS.set('=');
-
- for (int i = 'a'; i <= 'z'; i++) {
- RFC3986_UNRESERVED_CHARS.set(i);
- }
- for (int i = 'A'; i <= 'Z'; i++) {
- RFC3986_UNRESERVED_CHARS.set(i);
- }
- // numeric characters
- for (int i = '0'; i <= '9'; i++) {
- RFC3986_UNRESERVED_CHARS.set(i);
- }
- RFC3986_UNRESERVED_CHARS.set('-');
- RFC3986_UNRESERVED_CHARS.set('.');
- RFC3986_UNRESERVED_CHARS.set('_');
- RFC3986_UNRESERVED_CHARS.set('~');
-
- RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_UNRESERVED_CHARS);
- RFC3986_PATHSAFE_NC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
- RFC3986_PATHSAFE_NC_CHARS.set('@');
-
- RFC3986_PATHSAFE_CHARS.or(RFC3986_PATHSAFE_NC_CHARS);
- RFC3986_PATHSAFE_CHARS.set(':');
-
- RFC3986_URIC_CHARS.or(RFC3986_SUB_DELIMS_CHARS);
- RFC3986_URIC_CHARS.or(RFC3986_UNRESERVED_CHARS);
- }
+ private static class Chars {
+ private final BitSet set = new BitSet(256);
- private static final BitSet RFC5987_UNRESERVED_CHARS = new BitSet(256);
-
- static {
- // Alphanumeric characters
- for (int i = 'a'; i <= 'z'; i++) {
- RFC5987_UNRESERVED_CHARS.set(i);
- }
- for (int i = 'A'; i <= 'Z'; i++) {
- RFC5987_UNRESERVED_CHARS.set(i);
- }
- for (int i = '0'; i <= '9'; i++) {
- RFC5987_UNRESERVED_CHARS.set(i);
- }
-
- // Additional characters as per RFC 5987 attr-char
- RFC5987_UNRESERVED_CHARS.set('!');
- RFC5987_UNRESERVED_CHARS.set('#');
- RFC5987_UNRESERVED_CHARS.set('$');
- RFC5987_UNRESERVED_CHARS.set('&');
- RFC5987_UNRESERVED_CHARS.set('+');
- RFC5987_UNRESERVED_CHARS.set('-');
- RFC5987_UNRESERVED_CHARS.set('.');
- RFC5987_UNRESERVED_CHARS.set('^');
- RFC5987_UNRESERVED_CHARS.set('_');
- RFC5987_UNRESERVED_CHARS.set('`');
- RFC5987_UNRESERVED_CHARS.set('|');
- RFC5987_UNRESERVED_CHARS.set('~');
+ public void add(char... chars) {
+ for (char c : chars) {
+ set.set(c);
+ }
+ }
+
+ public void addRange(char start, char end) {
+ set.set(start, end + 1);
+ }
+
+ public void add(Chars set) {
+ this.set.or(set.set);
+ }
+
+ public boolean contains(int c) {
+ return set.get(c);
+ }
}
+ private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() {
+ {
+ add(':', '/', '?', '#', '[', ']', '@');
+ }
+ };
+ private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() {
+ {
+ add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=');
+ }
+ };
+ private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() {
+ {
+ addRange('a', 'z');
+ addRange('A', 'Z');
+ addRange('0', '9');
+ add('-', '.', '_', '~');
+ }
+ };
+ private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() {
+ {
+ add(RFC3986_UNRESERVED_CHARS);
+ add(RFC3986_SUB_DELIMS_CHARS);
+ add('@');
+ }
+ };
+ private static final Chars RFC3986_PATH_CHARS = new Chars() {
+ {
+ add(RFC3986_PATH_NO_COLON_CHARS);
+ add(':');
+ }
+ };
+ private static final Chars RFC3986_URIC_CHARS = new Chars() {
+ {
+ add(RFC3986_SUB_DELIMS_CHARS);
+ add(RFC3986_UNRESERVED_CHARS);
+ }
+ };
+
+ private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() {
+ {
+ addRange('a', 'z');
+ addRange('A', 'Z');
+ addRange('0', '9');
+ // Additional characters as per RFC 5987 attr-char
+ add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~');
+ }
+ };
+
private static final int RADIX = 16;
private static void encode(
final StringBuilder buf,
final CharSequence content,
final Charset charset,
- final BitSet safeChars,
+ final Chars safeChars,
final boolean blankAsPlus
) {
if (content == null) {
@@ -120,7 +107,7 @@ private static void encode(
final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
- if (safeChars.get(b)) {
+ if (safeChars.contains(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append("+");
@@ -165,12 +152,12 @@ private static String decode(final CharSequence content, final Charset charset,
}
public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
- public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATHSAFE_CHARS);
+ public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);
- private final BitSet unreserved;
+ private final Chars unreserved;
- private PercentCodec(final BitSet unreserved) {
+ private PercentCodec(final Chars unreserved) {
this.unreserved = unreserved;
}
From 2820ff8fb8192a168689e9d1fda6494b64b1c940 Mon Sep 17 00:00:00 2001
From: Thomas Farr
Date: Tue, 30 Jul 2024 14:28:46 +1200
Subject: [PATCH 3/6] Add change log
Signed-off-by: Thomas Farr
---
CHANGELOG.md | 1 +
.../java/org/opensearch/client/util/PathEncoder.java | 10 ++++++++--
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6093bfe19..94fe345f44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ This section is for maintaining a changelog for all breaking changes for the cli
### Dependencies
### Changed
+- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109))
### Deprecated
diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
index 2574aaa26a..04fd89e3f0 100644
--- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
+++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
@@ -9,12 +9,18 @@
package org.opensearch.client.util;
public class PathEncoder {
- public static final PercentCodec DEFAULT_CODEC = PercentCodec.RFC3986_UNRESERVED;
/**
* Percent encoding codec that matches Apache HTTP Client 4's path segment encoding.
*/
@Deprecated
- public static final PercentCodec APACHE_HTTP_CLIENT_4_COMPAT = PercentCodec.RFC3986_PATHSAFE;
+ public static final PercentCodec APACHE_HTTP_CLIENT_4_EQUIV_CODEC = PercentCodec.RFC3986_PATHSAFE;
+ /**
+ * Percent encoding codec that matches Apache HTTP Client 5's path segment encoding.
+ */
+ public static final PercentCodec APACHE_HTTP_CLIENT_5_EQUIV_CODEC = PercentCodec.RFC3986_UNRESERVED;
+
+ public static final PercentCodec DEFAULT_CODEC = APACHE_HTTP_CLIENT_5_EQUIV_CODEC;
+
private static PercentCodec codec;
public static PercentCodec getCodec() {
From 75f9059abd9b8b1e2b54271bbdd6cba208245304 Mon Sep 17 00:00:00 2001
From: Thomas Farr
Date: Wed, 31 Jul 2024 18:40:47 +1200
Subject: [PATCH 4/6] Switch to system property
Signed-off-by: Thomas Farr
---
.../opensearch/client/util/PathEncoder.java | 51 ++++++++++---------
.../opensearch/client/util/PercentCodec.java | 4 +-
.../client/util/PercentCodecTest.java | 16 +++---
3 files changed, 38 insertions(+), 33 deletions(-)
diff --git a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
index 04fd89e3f0..d9dde38df3 100644
--- a/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
+++ b/java-client/src/main/java/org/opensearch/client/util/PathEncoder.java
@@ -8,37 +8,42 @@
package org.opensearch.client.util;
+import java.util.Optional;
+
public class PathEncoder {
- /**
- * Percent encoding codec that matches Apache HTTP Client 4's path segment encoding.
- */
- @Deprecated
- public static final PercentCodec APACHE_HTTP_CLIENT_4_EQUIV_CODEC = PercentCodec.RFC3986_PATHSAFE;
- /**
- * Percent encoding codec that matches Apache HTTP Client 5's path segment encoding.
- */
- public static final PercentCodec APACHE_HTTP_CLIENT_5_EQUIV_CODEC = PercentCodec.RFC3986_UNRESERVED;
-
- public static final PercentCodec DEFAULT_CODEC = APACHE_HTTP_CLIENT_5_EQUIV_CODEC;
-
- private static PercentCodec codec;
-
- public static PercentCodec getCodec() {
- if (codec == null) {
- codec = DEFAULT_CODEC;
+ private enum Encoding {
+ RFC3986_PATH(PercentCodec.RFC3986_PATH),
+ HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH),
+
+ RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED),
+ HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED);
+
+ private final PercentCodec percentCodec;
+
+ Encoding(PercentCodec percentCodec) {
+ this.percentCodec = percentCodec;
}
- return codec;
- }
- public static void setCodec(PercentCodec codec) {
- PathEncoder.codec = codec;
+ static Optional get(String name) {
+ try {
+ return Optional.of(Encoding.valueOf(name.toUpperCase()));
+ } catch (Exception ignored) {
+ return Optional.empty();
+ }
+ }
}
+ private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding";
+ private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV;
+
+ private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY))
+ .flatMap(Encoding::get)
+ .orElse(ENCODING_DEFAULT);
public static String encode(String pathSegment) {
- return getCodec().encode(pathSegment);
+ return ENCODING.percentCodec.encode(pathSegment);
}
public static void encode(StringBuilder dest, CharSequence pathSegment) {
- getCodec().encode(dest, pathSegment);
+ ENCODING.percentCodec.encode(dest, pathSegment);
}
}
diff --git a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
index a672c71c38..9fb50637f9 100644
--- a/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
+++ b/java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
@@ -20,7 +20,7 @@
* Adapted from Apache HttpComponents HttpCore v5's PercentCodec.java
*
*/
-public class PercentCodec {
+class PercentCodec {
private static class Chars {
private final BitSet set = new BitSet(256);
@@ -152,7 +152,7 @@ private static String decode(final CharSequence content, final Charset charset,
}
public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
- public static final PercentCodec RFC3986_PATHSAFE = new PercentCodec(RFC3986_PATH_CHARS);
+ public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);
private final Chars unreserved;
diff --git a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
index 178dd3db03..7a13f69c58 100644
--- a/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
+++ b/java-client/src/test/java/org/opensearch/client/util/PercentCodecTest.java
@@ -22,7 +22,7 @@ public class PercentCodecTest {
public static Collection