diff --git a/e2e_test/udf/udf.slt b/e2e_test/udf/udf.slt index 4f5ba147a84a6..0b397cd05d284 100644 --- a/e2e_test/udf/udf.slt +++ b/e2e_test/udf/udf.slt @@ -185,6 +185,28 @@ select (return_all_arrays( ---- {NULL,t} {NULL,1} {NULL,1} {NULL,1} {NULL,1} {NULL,1} {NULL,12345678901234567890.12345678} {NULL,2023-06-01} {NULL,01:02:03.456789} {NULL,"2023-06-01 01:02:03.456789"} {NULL,"1 mon 2 days 00:00:03"} {NULL,string} {NULL,"\\x6279746573"} {NULL,"{\"key\": 1}"} {NULL,"(1,2)"} +# test large string output +query I +select length((return_all( + null::boolean, + null::smallint, + null::int, + null::bigint, + null::float4, + null::float8, + null::decimal, + null::date, + null::time, + null::timestamp, + null::interval, + repeat('a', 100000)::varchar, + repeat('a', 100000)::bytea, + null::jsonb, + null::struct +)).varchar); +---- +100000 + query I select series(5); ---- diff --git a/java/udf-example/pom.xml b/java/udf-example/pom.xml index e47ff0263e61d..49de72ab3fac7 100644 --- a/java/udf-example/pom.xml +++ b/java/udf-example/pom.xml @@ -31,7 +31,7 @@ com.risingwave risingwave-udf - 0.1.1-SNAPSHOT + 0.1.2-SNAPSHOT com.google.code.gson diff --git a/java/udf/CHANGELOG.md b/java/udf/CHANGELOG.md index 48f2b014271a7..7766206a0c9fd 100644 --- a/java/udf/CHANGELOG.md +++ b/java/udf/CHANGELOG.md @@ -7,7 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.1.1] - 2023-12-01 +## [0.1.2] - 2023-12-04 + +### Fixed + +- Fix index-out-of-bound error when string or string list is large. + +## [0.1.1] - 2023-12-03 ### Added diff --git a/java/udf/pom.xml b/java/udf/pom.xml index 7e9814b4af41e..ea19d85234dbc 100644 --- a/java/udf/pom.xml +++ b/java/udf/pom.xml @@ -6,7 +6,7 @@ com.risingwave risingwave-udf jar - 0.1.1-SNAPSHOT + 0.1.2-SNAPSHOT risingwave-java-root diff --git a/java/udf/src/main/java/com/risingwave/functions/TypeUtils.java b/java/udf/src/main/java/com/risingwave/functions/TypeUtils.java index 5a70a7ed5973b..f09e52f9548fa 100644 --- a/java/udf/src/main/java/com/risingwave/functions/TypeUtils.java +++ b/java/udf/src/main/java/com/risingwave/functions/TypeUtils.java @@ -286,7 +286,13 @@ static void fillVector(FieldVector fieldVector, Object[] values) { } } else if (fieldVector instanceof VarCharVector) { var vector = (VarCharVector) fieldVector; - vector.allocateNew(values.length); + int totalBytes = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] != null) { + totalBytes += ((String) values[i]).length(); + } + } + vector.allocateNew(totalBytes, values.length); for (int i = 0; i < values.length; i++) { if (values[i] != null) { vector.set(i, ((String) values[i]).getBytes()); @@ -294,7 +300,13 @@ static void fillVector(FieldVector fieldVector, Object[] values) { } } else if (fieldVector instanceof LargeVarCharVector) { var vector = (LargeVarCharVector) fieldVector; - vector.allocateNew(values.length); + int totalBytes = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] != null) { + totalBytes += ((String) values[i]).length(); + } + } + vector.allocateNew(totalBytes, values.length); for (int i = 0; i < values.length; i++) { if (values[i] != null) { vector.set(i, ((String) values[i]).getBytes()); @@ -302,7 +314,13 @@ static void fillVector(FieldVector fieldVector, Object[] values) { } } else if (fieldVector instanceof VarBinaryVector) { var vector = (VarBinaryVector) fieldVector; - vector.allocateNew(values.length); + int totalBytes = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] != null) { + totalBytes += ((byte[]) values[i]).length; + } + } + vector.allocateNew(totalBytes, values.length); for (int i = 0; i < values.length; i++) { if (values[i] != null) { vector.set(i, (byte[]) values[i]); @@ -311,83 +329,30 @@ static void fillVector(FieldVector fieldVector, Object[] values) { } else if (fieldVector instanceof ListVector) { var vector = (ListVector) fieldVector; vector.allocateNew(); - if (vector.getDataVector() instanceof BitVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val ? 1 : 0)); - } else if (vector.getDataVector() instanceof SmallIntVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof IntVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof BigIntVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof Float4Vector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof Float8Vector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof LargeVarBinaryVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val.toString().getBytes())); - } else if (vector.getDataVector() instanceof DateDayVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, (int) val.toEpochDay())); - } else if (vector.getDataVector() instanceof TimeMicroVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val.toNanoOfDay() / 1000)); - } else if (vector.getDataVector() instanceof TimeStampMicroVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, timestampToMicros(val))); - } else if (vector.getDataVector() instanceof IntervalMonthDayNanoVector) { - TypeUtils.fillListVector( - vector, - values, - (vec, i, val) -> { - var months = (int) val.getPeriod().toTotalMonths(); - var days = val.getPeriod().getDays(); - var nanos = val.getDuration().toNanos(); - vec.set(i, months, days, nanos); - }); - } else if (vector.getDataVector() instanceof VarCharVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val.getBytes())); - } else if (vector.getDataVector() instanceof LargeVarCharVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val.getBytes())); - } else if (vector.getDataVector() instanceof VarBinaryVector) { - TypeUtils.fillListVector( - vector, values, (vec, i, val) -> vec.set(i, val)); - } else if (vector.getDataVector() instanceof StructVector) { - // flatten the `values` - var flattenLength = 0; - for (int i = 0; i < values.length; i++) { - if (values[i] == null) { - continue; - } - var len = Array.getLength(values[i]); - vector.startNewValue(i); - vector.endValue(i, len); - flattenLength += len; + // flatten the `values` + var flattenLength = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] == null) { + continue; } - var flattenValues = new Object[flattenLength]; - var ii = 0; - for (var list : values) { - if (list == null) { - continue; - } - var length = Array.getLength(list); - for (int i = 0; i < length; i++) { - flattenValues[ii++] = Array.get(list, i); - } + var len = Array.getLength(values[i]); + vector.startNewValue(i); + vector.endValue(i, len); + flattenLength += len; + } + var flattenValues = new Object[flattenLength]; + var ii = 0; + for (var list : values) { + if (list == null) { + continue; + } + var length = Array.getLength(list); + for (int i = 0; i < length; i++) { + flattenValues[ii++] = Array.get(list, i); } - fillVector(vector.getDataVector(), flattenValues); - } else { - throw new IllegalArgumentException( - "Unsupported type: " + vector.getDataVector().getClass()); } + // fill the inner vector + fillVector(vector.getDataVector(), flattenValues); } else if (fieldVector instanceof StructVector) { var vector = (StructVector) fieldVector; vector.allocateNew(); @@ -430,33 +395,6 @@ static void fillVector(FieldVector fieldVector, Object[] values) { fieldVector.setValueCount(values.length); } - @FunctionalInterface - interface TriFunction { - void apply(T t, U u, V v); - } - - @SuppressWarnings("unchecked") - static void fillListVector( - ListVector vector, Object[] values, TriFunction set) { - var innerVector = (V) vector.getDataVector(); - int ii = 0; - for (int i = 0; i < values.length; i++) { - var array = (T[]) values[i]; - if (array == null) { - continue; - } - vector.startNewValue(i); - for (T v : array) { - if (v == null) { - innerVector.setNull(ii++); - } else { - set.apply(innerVector, ii++, v); - } - } - vector.endValue(i, array.length); - } - } - static long timestampToMicros(LocalDateTime timestamp) { var date = timestamp.toLocalDate().toEpochDay(); var time = timestamp.toLocalTime().toNanoOfDay();