risingwavelabs · xxhZs · Jan 4, 2024 · Dec 22, 2023 · Dec 25, 2023 · Dec 27, 2023
diff --git a/...ve-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java b/...ve-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java
@@ -23,7 +23,10 @@
 import com.risingwave.metrics.ConnectorNodeMetrics;
 import com.risingwave.metrics.MonitoredRowIterable;
 import com.risingwave.proto.ConnectorServiceProto;
+import com.risingwave.proto.Data;
+import com.risingwave.proto.Data.DataType.TypeName;
 import io.grpc.stub.StreamObserver;
+import java.util.List;
 import java.util.Optional;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -207,6 +210,17 @@ private void bindSink(
         String connectorName = getConnectorName(sinkParam);
         SinkFactory sinkFactory = SinkUtils.getSinkFactory(connectorName);
         sink = sinkFactory.createWriter(tableSchema, sinkParam.getPropertiesMap());
+        if (connectorName.equals("elasticsearch")) {
+            tableSchema =
+                    new TableSchema(
+                            List.of("id", "json_result"),
+                            List.of(
+                                    Data.DataType.newBuilder()
+                                            .setTypeName(TypeName.VARCHAR)
+                                            .build(),
+                                    Data.DataType.newBuilder().setTypeName(TypeName.JSONB).build()),
+                            List.of());
+        }
         switch (format) {
             case FORMAT_UNSPECIFIED:
             case UNRECOGNIZED:

diff --git a/...-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java b/...-connector-test/src/test/java/com/risingwave/connector/sink/elasticsearch/EsSinkTest.java
@@ -61,8 +61,8 @@ public void testEsSink(ElasticsearchContainer container, String username, String
                         getTestTableSchema());
         sink.write(
                 Iterators.forArray(
-                        new ArraySinkRow(Op.INSERT, 1, "Alice"),
-                        new ArraySinkRow(Op.INSERT, 2, "Bob")));
+                        new ArraySinkRow(Op.INSERT, "1$Alice", "{\"id\":1,\"name\":\"Alice\"}"),
+                        new ArraySinkRow(Op.INSERT, "2$Bob", "{\"id\":2,\"name\":\"Bob\"}")));
         sink.sync();
         // container is slow here, but our default flush time is 5s,
         // so 3s is enough for sync test

diff --git a/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java b/java/connector-node/risingwave-sink-es-7/src/main/java/com/risingwave/connector/EsSink.java
@@ -16,8 +16,6 @@
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonMappingException;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
 import com.risingwave.connector.api.TableSchema;
 import com.risingwave.connector.api.sink.SinkRow;
 import com.risingwave.connector.api.sink.SinkWriterBase;
@@ -27,7 +25,6 @@
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
 import org.apache.http.HttpHost;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
@@ -47,6 +44,7 @@
 import org.elasticsearch.common.unit.ByteSizeUnit;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.xcontent.XContentType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -67,11 +65,6 @@ public class EsSink extends SinkWriterBase {
     private static final Logger LOG = LoggerFactory.getLogger(EsSink.class);
     private static final String ERROR_REPORT_TEMPLATE = "Error message %s";
 
-    private static final TimeZone UTCTimeZone = TimeZone.getTimeZone("UTC");
-    private final SimpleDateFormat tDfm;
-    private final SimpleDateFormat tsDfm;
-    private final SimpleDateFormat tstzDfm;
-
     private final EsSinkConfig config;
     private BulkProcessor bulkProcessor;
     private final RestHighLevelClient client;
@@ -198,13 +191,9 @@ public EsSink(EsSinkConfig config, TableSchema tableSchema) {
         this.bulkProcessor = createBulkProcessor(this.requestTracker);
 
         primaryKeyIndexes = new ArrayList<Integer>();
-        for (String primaryKey : tableSchema.getPrimaryKeys()) {
-            primaryKeyIndexes.add(tableSchema.getColumnIndex(primaryKey));
+        for (String primaryKey : getTableSchema().getPrimaryKeys()) {
+            primaryKeyIndexes.add(getTableSchema().getColumnIndex(primaryKey));
         }
-
-        tDfm = createSimpleDateFormat("HH:mm:ss.SSS", UTCTimeZone);
-        tsDfm = createSimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", UTCTimeZone);
-        tstzDfm = createSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", UTCTimeZone);
     }
 
     private static RestClientBuilder configureRestClientBuilder(
@@ -297,116 +286,20 @@ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
         }
     }
 
-    /**
-     * The api accepts doc in map form.
-     *
-     * @param row
-     * @return Map from Field name to Value
-     * @throws JsonProcessingException
-     * @throws JsonMappingException
-     */
-    private Map<String, Object> buildDoc(SinkRow row)
-            throws JsonMappingException, JsonProcessingException {
-        Map<String, Object> doc = new HashMap();
-        var tableSchema = getTableSchema();
-        var columnDescs = tableSchema.getColumnDescs();
-        for (int i = 0; i < row.size(); i++) {
-            var type = columnDescs.get(i).getDataType().getTypeName();
-            Object col = row.get(i);
-            switch (type) {
-                    // es client doesn't natively support java.sql.Timestamp/Time/Date
-                    // so we need to convert Date/Time/Timestamp type into a string as suggested in
-                    // https://github.com/elastic/elasticsearch/issues/31377#issuecomment-398102292
-                case DATE:
-                    col = col.toString();
-                    break;
-                    // construct java.sql.Time/Timestamp with milliseconds time value.
-                    // it will use system timezone by default, so we have to set timezone manually
-                case TIME:
-                    col = tDfm.format(col);
-                    break;
-                case TIMESTAMP:
-                    col = tsDfm.format(col);
-                    break;
-                case TIMESTAMPTZ:
-                    col = tstzDfm.format(col);
-                    break;
-                case JSONB:
-                    ObjectMapper mapper = new ObjectMapper();
-                    JsonNode jsonNode = mapper.readTree((String) col);
-                    col = convertJsonNode(jsonNode);
-                    break;
-                default:
-                    break;
-            }
-
-            doc.put(getTableSchema().getColumnDesc(i).getName(), col);
-        }
-        return doc;
-    }
-
-    private static Object convertJsonNode(JsonNode jsonNode) {
-        if (jsonNode.isObject()) {
-            Map<String, Object> resultMap = new HashMap<>();
-            jsonNode.fields()
-                    .forEachRemaining(
-                            entry -> {
-                                resultMap.put(entry.getKey(), convertJsonNode(entry.getValue()));
-                            });
-            return resultMap;
-        } else if (jsonNode.isArray()) {
-            List<Object> resultList = new ArrayList<>();
-            jsonNode.elements()
-                    .forEachRemaining(
-                            element -> {
-                                resultList.add(convertJsonNode(element));
-                            });
-            return resultList;
-        } else if (jsonNode.isNumber()) {
-            return jsonNode.numberValue();
-        } else if (jsonNode.isTextual()) {
-            return jsonNode.textValue();
-        } else if (jsonNode.isBoolean()) {
-            return jsonNode.booleanValue();
-        } else if (jsonNode.isNull()) {
-            return null;
-        } else {
-            throw new IllegalArgumentException("Unsupported JSON type");
-        }
-    }
-
-    /**
-     * use primary keys as id concatenated by a specific delimiter.
-     *
-     * @param row
-     * @return
-     */
-    private String buildId(SinkRow row) {
-        String id;
-        if (primaryKeyIndexes.isEmpty()) {
-            id = row.get(0).toString();
-        } else {
-            List<String> keys =
-                    primaryKeyIndexes.stream()
-                            .map(index -> row.get(primaryKeyIndexes.get(index)).toString())
-                            .collect(Collectors.toList());
-            id = String.join(config.getDelimiter(), keys);
-        }
-        return id;
-    }
-
     private void processUpsert(SinkRow row) throws JsonMappingException, JsonProcessingException {
-        Map<String, Object> doc = buildDoc(row);
-        final String key = buildId(row);
+        final String key = (String) row.get(0);
+        String doc = (String) row.get(1);
 
         UpdateRequest updateRequest =
-                new UpdateRequest(config.getIndex(), "doc", key).doc(doc).upsert(doc);
+                new UpdateRequest(config.getIndex(), "doc", key).doc(doc, XContentType.JSON);
+        updateRequest.docAsUpsert(true);
         this.requestTracker.addWriteTask();
         bulkProcessor.add(updateRequest);
     }
 
-    private void processDelete(SinkRow row) {
-        final String key = buildId(row);
+    private void processDelete(SinkRow row) throws JsonMappingException, JsonProcessingException {
+        final String key = (String) row.get(0);
+
         DeleteRequest deleteRequest = new DeleteRequest(config.getIndex(), "doc", key);
         this.requestTracker.addWriteTask();
         bulkProcessor.add(deleteRequest);

diff --git a/src/connector/src/sink/big_query.rs b/src/connector/src/sink/big_query.rs
@@ -33,7 +33,9 @@ use url::Url;
 use with_options::WithOptions;
 use yup_oauth2::ServiceAccountKey;
 
-use super::encoder::{JsonEncoder, RowEncoder, TimestampHandlingMode};
+use super::encoder::{
+    DateHandlingMode, JsonEncoder, RowEncoder, TimestampHandlingMode, TimestamptzHandlingMode,
+};
 use super::writer::LogSinkerOf;
 use super::{SinkError, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
 use crate::aws_utils::load_file_descriptor_from_s3;
@@ -306,10 +308,12 @@ impl BigQuerySinkWriter {
             client,
             is_append_only,
             insert_request: TableDataInsertAllRequest::new(),
-            row_encoder: JsonEncoder::new_with_big_query(
+            row_encoder: JsonEncoder::new(
                 schema,
                 None,
+                DateHandlingMode::String,
                 TimestampHandlingMode::String,
+                TimestamptzHandlingMode::UtcString,
             ),
         })
     }

diff --git a/src/connector/src/sink/encoder/json.rs b/src/connector/src/sink/encoder/json.rs
@@ -23,7 +23,7 @@ use itertools::Itertools;
 use risingwave_common::array::{ArrayError, ArrayResult};
 use risingwave_common::catalog::{Field, Schema};
 use risingwave_common::row::Row;
-use risingwave_common::types::{DataType, DatumRef, Decimal, ScalarRefImpl, ToText};
+use risingwave_common::types::{DataType, DatumRef, Decimal, JsonbVal, ScalarRefImpl, ToText};
 use risingwave_common::util::iter_util::ZipEqDebug;
 use serde_json::{json, Map, Value};
 
@@ -62,6 +62,18 @@ impl JsonEncoder {
         }
     }
 
+    pub fn new_with_es(schema: Schema, col_indices: Option<Vec<usize>>) -> Self {
+        Self {
+            schema,
+            col_indices,
+            date_handling_mode: DateHandlingMode::String,
+            timestamp_handling_mode: TimestampHandlingMode::String,
+            timestamptz_handling_mode: TimestamptzHandlingMode::UtcWithoutSuffix,
+            custom_json_type: CustomJsonType::Es,
+            kafka_connect: None,
+        }
+    }
+
     pub fn new_with_doris(
         schema: Schema,
         col_indices: Option<Vec<usize>>,
@@ -85,22 +97,6 @@ impl JsonEncoder {
             ..self
         }
     }
-
-    pub fn new_with_big_query(
-        schema: Schema,
-        col_indices: Option<Vec<usize>>,
-        timestamp_handling_mode: TimestampHandlingMode,
-    ) -> Self {
-        Self {
-            schema,
-            col_indices,
-            date_handling_mode: DateHandlingMode::String,
-            timestamp_handling_mode,
-            timestamptz_handling_mode: TimestamptzHandlingMode::UtcString,
-            custom_json_type: CustomJsonType::Bigquery,
-            kafka_connect: None,
-        }
-    }
 }
 
 impl RowEncoder for JsonEncoder {
@@ -216,7 +212,7 @@ fn datum_to_json_object(
                 }
                 json!(v_string)
             }
-            CustomJsonType::None | CustomJsonType::Bigquery => {
+            _ => {
                 json!(v.to_text())
             }
         },
@@ -260,9 +256,10 @@ fn datum_to_json_object(
         (DataType::Interval, ScalarRefImpl::Interval(v)) => {
             json!(v.as_iso_8601())
         }
-        (DataType::Jsonb, ScalarRefImpl::Jsonb(jsonb_ref)) => {
-            json!(jsonb_ref.to_string())
-        }
+        (DataType::Jsonb, ScalarRefImpl::Jsonb(jsonb_ref)) => match custom_json_type {
+            CustomJsonType::Es => JsonbVal::from(jsonb_ref).take(),
+            _ => json!(jsonb_ref.to_string()),
+        },
         (DataType::List(datatype), ScalarRefImpl::List(list_ref)) => {
             let elems = list_ref.iter();
             let mut vec = Vec::with_capacity(elems.len());
@@ -303,7 +300,7 @@ fn datum_to_json_object(
                         ArrayError::internal(format!("Json to string err{:?}", err))
                     })?)
                 }
-                CustomJsonType::None | CustomJsonType::Bigquery => {
+                _ => {
                     let mut map = Map::with_capacity(st.len());
                     for (sub_datum_ref, sub_field) in struct_ref.iter_fields_ref().zip_eq_debug(
                         st.iter()

diff --git a/src/connector/src/sink/encoder/mod.rs b/src/connector/src/sink/encoder/mod.rs
@@ -134,8 +134,8 @@ pub enum CustomJsonType {
     // The internal order of the struct should follow the insertion order.
     // The decimal needs verification and calibration.
     Doris(HashMap<String, (u8, u8)>),
-    // Bigquery's json need date is string.
-    Bigquery,
+    // Es's json need jsonb is struct
+    Es,
     None,
 }