Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use own copy of PercentCodec for URI path encoding #1109

Merged
merged 6 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ This section is for maintaining a changelog for all breaking changes for the cli
### Dependencies

### Changed
- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109))

### Deprecated

Expand Down
5 changes: 4 additions & 1 deletion UPGRADING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# UPGRADING

## [UPGRADING 2.x to 3.0]
## Upgrading 2.x to 3.0
### URL Path Encoding
- The default URL path encoding has been changed to be more conservative. Previously the `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `@` and `:` characters were left un-encoded, they will now be percent-encoded. If you require the previous behavior you can specify the `org.opensearch.path.encoding=HTTP_CLIENT_V4_EQUIV` system property.

### SearchAfter of SearchRequest type
- Changed SearchAfter of SearchRequest type to FieldValue instead of String ([#769](https://github.com/opensearch-project/opensearch-java/pull/769))
- Consider using `FieldValue.of` to make string type values compatible.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) {
}

public static void pathEncode(String src, StringBuilder dest) {
dest.append(PathEncoder.encode(src));
PathEncoder.encode(dest, src);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,43 @@

package org.opensearch.client.util;

/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Optional;

public class PathEncoder {
private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils";
private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils";
private final static MethodHandle FORMAT_SEGMENTS_MH;
private enum Encoding {
RFC3986_PATH(PercentCodec.RFC3986_PATH),
HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH),

RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED),
HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED);

static {
Class<?> clazz = null;
try {
// Try Apache HttpClient5 first since this is a default one
clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS);
} catch (final ClassNotFoundException ex) {
private final PercentCodec percentCodec;

Encoding(PercentCodec percentCodec) {
this.percentCodec = percentCodec;
}

static Optional<Encoding> get(String name) {
try {
// Fallback to Apache HttpClient4
clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS);
} catch (final ClassNotFoundException ex1) {
clazz = null;
return Optional.of(Encoding.valueOf(name.toUpperCase()));
} catch (Exception ignored) {
return Optional.empty();
}
}
}

if (clazz == null) {
throw new IllegalStateException(
"Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath"
);
}
private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding";
private static final Encoding ENCODING_DEFAULT = Encoding.HTTP_CLIENT_V5_EQUIV;

try {
FORMAT_SEGMENTS_MH = MethodHandles.lookup()
.findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class));
} catch (final NoSuchMethodException | IllegalAccessException ex) {
throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class");
}
private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY))
.flatMap(Encoding::get)
.orElse(ENCODING_DEFAULT);

public static String encode(String pathSegment) {
return ENCODING.percentCodec.encode(pathSegment);
}

public static String encode(String uri) {
try {
return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1);
} catch (final Throwable ex) {
throw new RuntimeException("Unable to encode URI: " + uri, ex);
}
public static void encode(StringBuilder dest, CharSequence pathSegment) {
ENCODING.percentCodec.encode(dest, pathSegment);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.client.util;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;

/**
* Percent-encoding.
* <p>
* Adapted from Apache HttpComponents HttpCore v5's <a href="https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java">PercentCodec.java</a>
* </p>
*/
class PercentCodec {
private static class Chars {
private final BitSet set = new BitSet(256);

public void add(char... chars) {
for (char c : chars) {
set.set(c);
}
}

public void addRange(char start, char end) {
set.set(start, end + 1);
}

public void add(Chars set) {
this.set.or(set.set);
}

public boolean contains(int c) {
return set.get(c);
}
}

private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() {
{
add(':', '/', '?', '#', '[', ']', '@');
}
};
private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() {
{
add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=');
}
};
private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
add('-', '.', '_', '~');
}
};
private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() {
{
add(RFC3986_UNRESERVED_CHARS);
add(RFC3986_SUB_DELIMS_CHARS);
add('@');
}
};
private static final Chars RFC3986_PATH_CHARS = new Chars() {
{
add(RFC3986_PATH_NO_COLON_CHARS);
add(':');
}
};
private static final Chars RFC3986_URIC_CHARS = new Chars() {
{
add(RFC3986_SUB_DELIMS_CHARS);
add(RFC3986_UNRESERVED_CHARS);
}
};

private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
// Additional characters as per RFC 5987 attr-char
add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~');
}
};

private static final int RADIX = 16;

private static void encode(
final StringBuilder buf,
final CharSequence content,
final Charset charset,
final Chars safeChars,
final boolean blankAsPlus
) {
if (content == null) {
return;
}
final CharBuffer cb = CharBuffer.wrap(content);
final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
if (safeChars.contains(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append("+");
} else {
buf.append("%");
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
buf.append(hex1);
buf.append(hex2);
}
}
}

private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) {
if (content == null) {
return null;
}
final ByteBuffer bb = ByteBuffer.allocate(content.length());
final CharBuffer cb = CharBuffer.wrap(content);
while (cb.hasRemaining()) {
final char c = cb.get();
if (c == '%' && cb.remaining() >= 2) {
final char uc = cb.get();
final char lc = cb.get();
final int u = Character.digit(uc, RADIX);
final int l = Character.digit(lc, RADIX);
if (u != -1 && l != -1) {
bb.put((byte) ((u << 4) + l));
} else {
bb.put((byte) '%');
bb.put((byte) uc);
bb.put((byte) lc);
}
} else if (plusAsBlank && c == '+') {
bb.put((byte) ' ');
} else {
bb.put((byte) c);
}
}
bb.flip();
return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString();
}

public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);

private final Chars unreserved;

private PercentCodec(final Chars unreserved) {
this.unreserved = unreserved;
}

public void encode(final StringBuilder buf, final CharSequence content) {
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
}

public String encode(final CharSequence content) {
if (content == null) {
return null;
}
final StringBuilder buf = new StringBuilder();
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
return buf.toString();
}

public String decode(final CharSequence content) {
return decode(content, StandardCharsets.UTF_8, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,10 @@ public void testArrayPathParameter() {
assertEquals("/a/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a", "b"));
if (isHttpClient5Present()) {
assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

} else {
assertEquals("/a,b/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Cb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a", "b", "c"));
if (isHttpClient5Present()) {
assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
} else {
assertEquals("/a,b,c/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Cb%2Cc/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}

@Test
Expand All @@ -80,11 +71,7 @@ public void testPathEncoding() {
assertEquals("/a%2Fb/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

req = RefreshRequest.of(b -> b.index("a/b", "c/d"));
if (isHttpClient5Present()) {
assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
} else {
assertEquals("/a%2Fb,c%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));
}
assertEquals("/a%2Fb%2Cc%2Fd/_refresh", RefreshRequest._ENDPOINT.requestUrl(req));

}

Expand All @@ -103,13 +90,4 @@ public void testArrayQueryParameter() {
req = RefreshRequest.of(b -> b.expandWildcards(ExpandWildcard.All, ExpandWildcard.Closed));
assertEquals("all,closed", RefreshRequest._ENDPOINT.queryParameters(req).get("expand_wildcards"));
}

private static boolean isHttpClient5Present() {
try {
Class.forName("org.apache.hc.core5.net.URLEncodedUtils");
return true;
} catch (ClassNotFoundException e) {
return false;
}
}
}

This file was deleted.

Loading
Loading