Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sink): refator es sink to support opensearch #16330

Merged
merged 12 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public static SinkFactory getSinkFactory(String sinkName) {
case "jdbc":
return new JDBCSinkFactory();
case "elasticsearch":
case "opensearch":
return new EsSinkFactory();
case "cassandra":
return new CassandraFactory();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.common.collect.Lists;
import com.risingwave.connector.EsSink;
import com.risingwave.connector.EsSinkConfig;
import com.risingwave.connector.RestHighLevelClientAdapter;
import com.risingwave.connector.api.TableSchema;
import com.risingwave.connector.api.sink.ArraySinkRow;
import com.risingwave.proto.Data;
Expand All @@ -31,7 +32,6 @@
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
Expand Down Expand Up @@ -74,12 +74,13 @@ public void testEsSink(ElasticsearchContainer container, String username, String
fail(e.getMessage());
}

RestHighLevelClient client = sink.getClient();
RestHighLevelClientAdapter client = sink.getClient();
SearchRequest searchRequest = new SearchRequest("test");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchResponse searchResponse =
(SearchResponse) client.search(searchRequest, RequestOptions.DEFAULT);

SearchHits hits = searchResponse.getHits();
assertEquals(2, hits.getHits().length);
Expand Down
8 changes: 8 additions & 0 deletions java/connector-node/risingwave-sink-es-7/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
</dependency>
<dependency>
<groupId>org.opensearch</groupId>
<artifactId>opensearch</artifactId>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-high-level-client</artifactId>
</dependency>
xxhZs marked this conversation as resolved.
Show resolved Hide resolved
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2024 RisingWave Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.risingwave.connector;
xxhZs marked this conversation as resolved.
Show resolved Hide resolved

import com.risingwave.connector.EsSink.RequestTracker;
import com.risingwave.connector.api.sink.SinkRow;
import java.util.concurrent.TimeUnit;

public interface BulkProcessorAdapter {
public void addRow(SinkRow row, String indexName, RequestTracker requestTracker);
xxhZs marked this conversation as resolved.
Show resolved Hide resolved

public void deleteRow(SinkRow row, String indexName, RequestTracker requestTracker);

public void flush();

public void awaitClose(long timeout, TimeUnit unit) throws InterruptedException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@
import org.elasticsearch.action.bulk.BulkResponse;

/**
* {@link BulkRequestConsumerFactory} is used to bridge incompatible Elasticsearch Java API calls
* across different Elasticsearch versions.
* {@link ElasticBulkRequestConsumerFactory} is used to bridge incompatible Elasticsearch Java API
* calls across different Elasticsearch versions.
*/
interface BulkRequestConsumerFactory
interface ElasticBulkRequestConsumerFactory
xxhZs marked this conversation as resolved.
Show resolved Hide resolved
extends BiConsumer<BulkRequest, ActionListener<BulkResponse>> {}

/**
* {@link ElasticBulkRequestConsumerFactory} is used to bridge incompatible Elasticsearch Java API
* calls across different Elasticsearch versions.
*/
interface OpensearchBulkRequestConsumerFactory
extends BiConsumer<
org.opensearch.action.bulk.BulkRequest,
org.opensearch.core.action.ActionListener<
org.opensearch.action.bulk.BulkResponse>> {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright 2024 RisingWave Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.risingwave.connector;
xxhZs marked this conversation as resolved.
Show resolved Hide resolved

import com.risingwave.connector.EsSink.RequestTracker;
import com.risingwave.connector.api.sink.SinkRow;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.XContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ElasticBulkProcessorAdapter implements BulkProcessorAdapter {
private static final Logger LOG = LoggerFactory.getLogger(EsSink.class);
BulkProcessor esBulkProcessor;

private class BulkListener implements BulkProcessor.Listener {
xxhZs marked this conversation as resolved.
Show resolved Hide resolved
private final RequestTracker requestTracker;

public BulkListener(RequestTracker requestTracker) {
this.requestTracker = requestTracker;
}

/** This method is called just before bulk is executed. */
@Override
public void beforeBulk(long executionId, BulkRequest request) {
LOG.debug("Sending bulk of {} actions to Elasticsearch.", request.numberOfActions());
}

/** This method is called after bulk execution. */
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
if (response.hasFailures()) {
String errMessage =
String.format(
"Bulk of %d actions failed. Failure: %s",
request.numberOfActions(), response.buildFailureMessage());
this.requestTracker.addErrResult(errMessage);
} else {
this.requestTracker.addOkResult(request.numberOfActions());
LOG.debug("Sent bulk of {} actions to Elasticsearch.", request.numberOfActions());
}
}

/** This method is called when the bulk failed and raised a Throwable */
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
String errMessage =
String.format(
"Bulk of %d actions failed. Failure: %s",
request.numberOfActions(), failure.getMessage());
this.requestTracker.addErrResult(errMessage);
}
}

public ElasticBulkProcessorAdapter(
RequestTracker requestTracker, ElasticRestHighLevelClientAdapter client) {
BulkProcessor.Builder builder =
BulkProcessor.builder(
(ElasticBulkRequestConsumerFactory)
(bulkRequest, bulkResponseActionListener) ->
client.bulkAsync(
bulkRequest,
RequestOptions.DEFAULT,
bulkResponseActionListener),
new BulkListener(requestTracker));
// Possible feature: move these to config
// execute the bulk every 10 000 requests
builder.setBulkActions(1000);
// flush the bulk every 5mb
builder.setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB));
// flush the bulk every 5 seconds whatever the number of requests
builder.setFlushInterval(TimeValue.timeValueSeconds(5));
// Set the number of concurrent requests
builder.setConcurrentRequests(1);
// Set a custom backoff policy which will initially wait for 100ms, increase exponentially
// and retries up to three times.
builder.setBackoffPolicy(
BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3));
this.esBulkProcessor = builder.build();
}

@Override
public void flush() {
esBulkProcessor.flush();
}

@Override
public void awaitClose(long timeout, TimeUnit unit) throws InterruptedException {
esBulkProcessor.awaitClose(timeout, unit);
}

@Override
public void addRow(SinkRow row, String indexName, RequestTracker requestTracker) {
final String index = (String) row.get(0);
final String key = (String) row.get(1);
String doc = (String) row.get(2);

UpdateRequest updateRequest;
if (indexName != null) {
updateRequest = new UpdateRequest(indexName, "_doc", key).doc(doc, XContentType.JSON);
} else {
updateRequest = new UpdateRequest(indexName, "_doc", key).doc(doc, XContentType.JSON);
}
updateRequest.docAsUpsert(true);
requestTracker.addWriteTask();
this.esBulkProcessor.add(updateRequest);
}

@Override
public void deleteRow(SinkRow row, String indexName, RequestTracker requestTracker) {
final String index = (String) row.get(0);
final String key = (String) row.get(1);

DeleteRequest deleteRequest;
if (indexName != null) {
deleteRequest = new DeleteRequest(indexName, "_doc", key);
} else {
deleteRequest = new DeleteRequest(index, "_doc", key);
}
requestTracker.addWriteTask();
this.esBulkProcessor.add(deleteRequest);
}
}
Loading
Loading