From 62d7b5b54f0a72a463994a97c186880307e255ab Mon Sep 17 00:00:00 2001 From: Ganesh Mahadevan Date: Mon, 29 Jul 2024 22:20:49 -0500 Subject: [PATCH 1/5] add java and scala size and ordering functions --- .../snowflake/snowpark_java/Functions.java | 37 +++++++++++++++++++ .../com/snowflake/snowpark/functions.scala | 24 ++++++++++++ 2 files changed, 61 insertions(+) diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java index 56d8d08b..feca42c8 100644 --- a/src/main/java/com/snowflake/snowpark_java/Functions.java +++ b/src/main/java/com/snowflake/snowpark_java/Functions.java @@ -2,6 +2,7 @@ import static com.snowflake.snowpark.internal.OpenTelemetry.javaUDF; +import com.snowflake.snowpark.functions; import com.snowflake.snowpark.internal.JavaUtils; import com.snowflake.snowpark_java.types.DataType; import com.snowflake.snowpark_java.udf.*; @@ -3880,6 +3881,42 @@ public static Column listagg(Column col) { return new Column(com.snowflake.snowpark.functions.listagg(col.toScalaColumn())); } + /** + * Function to convert column name into column and order in descending manner. + * + * @since 1.14.0 + * @param name The input column name + * @return Column object ordered in descending manner. + */ + public static Column desc(String name) { + return new Column(functions.desc(name)); + } + + /** + * Function to convert column name into column and order in ascending manner. + * + * @since 1.14.0 + * @param name The input column name + * @return Column object ordered in ascending manner. + */ + public static Column asc(String name) { + return new Column(functions.asc(name)); + } + + /** + * Returns the size of the input ARRAY. + * + *

If the specified column contains a VARIANT value that contains an ARRAY, the size of the + * ARRAY is returned; otherwise, NULL is returned if the value is not an ARRAY. + * + * @since 1.14.0 + * @param col The input column name + * @return size of the input ARRAY. + */ + public static Column size(Column col) { + return array_size(col); + } + /** * Calls a user-defined function (UDF) by name. * diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index a7fd9ff0..956a7f0b 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3140,6 +3140,30 @@ object functions { */ def listagg(col: Column): Column = listagg(col, "", isDistinct = false) + /** + * Function to convert column name into column and order in a descending manner. + * @since 1.14.0 + * @param c Column name. + * @return Column object ordered in a descending manner. + */ + def desc(c: String): Column = col(c).desc + + /** + * Function to convert column name into column and order in an ascending manner. + * @since 1.14.0 + * @param colname Column name. + * @return Column object ordered in an ascending manner. + */ + def asc(colname: String): Column = col(colname).asc + + /** + * Wrapper for Snowflake built-in size function. Gets the size of array column. + * @since 1.14.0 + * @param c Column to get the size. + * @return Size of array column. + */ + def size(c: Column): Column = array_size(c) + /** * Invokes a built-in snowflake function with the specified name and arguments. * Arguments can be of two types From 64d81aa59e502055400508a8f2d86de19ed0b2c6 Mon Sep 17 00:00:00 2001 From: Ganesh Mahadevan Date: Mon, 5 Aug 2024 11:58:03 -0500 Subject: [PATCH 2/5] add scala unit test for ordering and size function --- .../com/snowflake/snowpark/functions.scala | 8 +++--- .../snowpark_test/FunctionSuite.scala | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 956a7f0b..03f0b957 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3143,18 +3143,18 @@ object functions { /** * Function to convert column name into column and order in a descending manner. * @since 1.14.0 - * @param c Column name. + * @param colName Column name. * @return Column object ordered in a descending manner. */ - def desc(c: String): Column = col(c).desc + def desc(colName: String): Column = col(colName).desc /** * Function to convert column name into column and order in an ascending manner. * @since 1.14.0 - * @param colname Column name. + * @param colName Column name. * @return Column object ordered in an ascending manner. */ - def asc(colname: String): Column = col(colname).asc + def asc(colName: String): Column = col(colName).asc /** * Wrapper for Snowflake built-in size function. Gets the size of array column. diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala index e473de12..0faa6b59 100644 --- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala @@ -2178,6 +2178,34 @@ trait FunctionSuite extends TestData { sort = false) } + test("desc column order") { + val input = Seq(1, 2, 3).toDF("data") + val expected = Seq(3, 2, 1).toDF("data") + + val inputStr = Seq("a", "b", "c").toDF("dataStr") + val expectedStr = Seq("c", "b", "a").toDF("dataStr") + + checkAnswer(input.sort(desc("data")), expected, sort = false) + checkAnswer(inputStr.sort(desc("dataStr")), expectedStr, sort = false) + } + + test("asc column order") { + val input = Seq(3, 2, 1).toDF("data") + val expected = Seq(1, 2, 3).toDF("data") + + val inputStr = Seq("c", "b", "a").toDF("dataStr") + val expectedStr = Seq("a", "b", "c").toDF("dataStr") + + checkAnswer(input.sort(asc("data")), expected, sort = false) + checkAnswer(inputStr.sort(asc("dataStr")), expectedStr, sort = false) + } + + test("column array size") { + val input = Seq(Array(1, 2, 3)).toDF("size") + val expected = Seq((3)).toDF("size") + checkAnswer(input.select(size(col("size"))), expected, sort = false) + } + } class EagerFunctionSuite extends FunctionSuite with EagerSession From e0fd5605454868d74eb90b03972d656e0ed133b9 Mon Sep 17 00:00:00 2001 From: Ganesh Mahadevan Date: Mon, 5 Aug 2024 13:56:59 -0500 Subject: [PATCH 3/5] update comments and add example --- .../com/snowflake/snowpark/functions.scala | 49 +++++++++++++++++-- .../snowpark_test/FunctionSuite.scala | 1 + 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 03f0b957..1cd3eff0 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3141,7 +3141,21 @@ object functions { def listagg(col: Column): Column = listagg(col, "", isDistinct = false) /** - * Function to convert column name into column and order in a descending manner. + * Returns a Column expression with values sorted in descending order. + * Example: + * {{{ + * val df = session.createDataFrame(Seq(1, 2, 3)).toDF("id") + * df.sort(desc("id")).show() + * + * -------- + * |"ID" | + * -------- + * |3 | + * |2 | + * |1 | + * -------- + * }}} + * * @since 1.14.0 * @param colName Column name. * @return Column object ordered in a descending manner. @@ -3149,7 +3163,20 @@ object functions { def desc(colName: String): Column = col(colName).desc /** - * Function to convert column name into column and order in an ascending manner. + * Returns a Column expression with values sorted in ascending order. + * Example: + * {{{ + * val df = session.createDataFrame(Seq(3, 2, 1)).toDF("id") + * df.sort(asc("id")).show() + * + * -------- + * |"ID" | + * -------- + * |1 | + * |2 | + * |3 | + * -------- + * }}} * @since 1.14.0 * @param colName Column name. * @return Column object ordered in an ascending manner. @@ -3157,7 +3184,23 @@ object functions { def asc(colName: String): Column = col(colName).asc /** - * Wrapper for Snowflake built-in size function. Gets the size of array column. + * Returns the size of the input ARRAY. + * + * If the specified column contains a VARIANT value that contains an ARRAY, the size of the ARRAY + * is returned; otherwise, NULL is returned if the value is not an ARRAY. + * + * Example: + * {{{ + * val df = session.createDataFrame(Seq(Array(1, 2, 3))).toDF("id") + * df.select(size(col("id"))).show() + * + * ------------------------ + * |"ARRAY_SIZE(""ID"")" | + * ------------------------ + * |3 | + * ------------------------ + * }}} + * * @since 1.14.0 * @param c Column to get the size. * @return Size of array column. diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala index 0faa6b59..770e7c7d 100644 --- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala @@ -2201,6 +2201,7 @@ trait FunctionSuite extends TestData { } test("column array size") { + val input = Seq(Array(1, 2, 3)).toDF("size") val expected = Seq((3)).toDF("size") checkAnswer(input.select(size(col("size"))), expected, sort = false) From 027141afd8790d14808f0a423475c6443e8a9345 Mon Sep 17 00:00:00 2001 From: Ganesh Mahadevan Date: Mon, 5 Aug 2024 16:34:30 -0500 Subject: [PATCH 4/5] add java test cases --- .../snowflake/snowpark_java/Functions.java | 44 ++++++++++++++++++- .../com/snowflake/snowpark/functions.scala | 2 +- .../snowpark_test/JavaFunctionSuite.java | 26 +++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java index feca42c8..dbadd87b 100644 --- a/src/main/java/com/snowflake/snowpark_java/Functions.java +++ b/src/main/java/com/snowflake/snowpark_java/Functions.java @@ -3882,7 +3882,21 @@ public static Column listagg(Column col) { } /** - * Function to convert column name into column and order in descending manner. + * Returns a Column expression with values sorted in descending order. + * + *

Example: order column values in descending + * + *

{@code
+   * DataFrame df = getSession().sql("select * from values(1),(2),(3) as t(a)");
+   * df.sort(Functions.desc("a")).show();
+   * -------
+   * |"A"  |
+   * -------
+   * |3    |
+   * |2    |
+   * |1    |
+   * -------
+   * }
* * @since 1.14.0 * @param name The input column name @@ -3893,7 +3907,21 @@ public static Column desc(String name) { } /** - * Function to convert column name into column and order in ascending manner. + * Returns a Column expression with values sorted in ascending order. + * + *

Example: order column values in ascending + * + *

{@code
+   * DataFrame df = getSession().sql("select * from values(3),(1),(2) as t(a)");
+   * df.sort(Functions.asc("a")).show();
+   * -------
+   * |"A"  |
+   * -------
+   * |1    |
+   * |2    |
+   * |3    |
+   * -------
+   * }
* * @since 1.14.0 * @param name The input column name @@ -3909,6 +3937,18 @@ public static Column asc(String name) { *

If the specified column contains a VARIANT value that contains an ARRAY, the size of the * ARRAY is returned; otherwise, NULL is returned if the value is not an ARRAY. * + *

Example: calculate size of the array in a column + * + *

{@code
+   * DataFrame df = getSession().sql("select array_construct(a,b,c) as arr from values(1,2,3) as T(a,b,c)");
+   * df.select(Functions.size(Functions.col("arr"))).show();
+   * -------------------------
+   * |"ARRAY_SIZE(""ARR"")"  |
+   * -------------------------
+   * |3                      |
+   * -------------------------
+   * }
+ * * @since 1.14.0 * @param col The input column name * @return size of the input ARRAY. diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 1cd3eff0..b13eb7e9 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3903,7 +3903,7 @@ object functions { * {{{ * val repeat = functions.builtin("repeat") * df.select(repeat(col("col_1"), 3)) - * }}} + * }}}F * * @group client_func * @since 0.1.0 diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java index 6ee298d3..2b3b4fc9 100644 --- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java +++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java @@ -2764,4 +2764,30 @@ public void any_value() { assert result.length == 1; assert result[0].getInt(0) == 1 || result[0].getInt(0) == 2 || result[0].getInt(0) == 3; } + + @Test + public void test_asc() { + DataFrame df = getSession().sql("select * from values(3),(1),(2) as t(a)"); + Row[] expected = {Row.create(1), Row.create(2), Row.create(3)}; + + checkAnswer(df.sort(Functions.asc("a")), expected, false); + } + + @Test + public void test_desc() { + DataFrame df = getSession().sql("select * from values(2),(1),(3) as t(a)"); + Row[] expected = {Row.create(3), Row.create(2), Row.create(1)}; + + checkAnswer(df.sort(Functions.desc("a")), expected, false); + } + + @Test + public void test_size() { + DataFrame df = getSession() + .sql( + "select array_construct(a,b,c) as arr from values(1,2,3) as T(a,b,c)"); + Row[] expected = {Row.create(3)}; + + checkAnswer(df.select(Functions.size(Functions.col("arr"))), expected, false); + } } From 58a15f9e4c9e11d11cceb21e8eb83283f2f24fa1 Mon Sep 17 00:00:00 2001 From: Ganesh Mahadevan Date: Mon, 5 Aug 2024 16:37:03 -0500 Subject: [PATCH 5/5] fix comments --- src/main/scala/com/snowflake/snowpark/functions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index b13eb7e9..1cd3eff0 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3903,7 +3903,7 @@ object functions { * {{{ * val repeat = functions.builtin("repeat") * df.select(repeat(col("col_1"), 3)) - * }}}F + * }}} * * @group client_func * @since 0.1.0