From 1f434cc7f70ea5d544be74cbbdad99f6880ceab9 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 16:44:10 +0800 Subject: [PATCH 01/32] feat: add data_type parameter to expr_fn macro for arrow_cast function --- src/functions.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index e29c57f9..6e6bbd1d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -384,12 +384,11 @@ macro_rules! expr_fn { ($FUNC: ident, $($arg:ident)*, $DOC: expr) => { #[doc = $DOC] #[pyfunction] - fn $FUNC($($arg: PyExpr),*) -> PyExpr { - functions::expr_fn::$FUNC($($arg.into()),*).into() + fn $FUNC($($arg: PyExpr),*, data_type: &str) -> PyExpr { + functions::expr_fn::$FUNC($($arg.into()),*, data_type.to_string()).into() } }; } - /// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] /// /// These functions take a single `Vec` argument using `pyo3(signature = (*args))`. @@ -564,6 +563,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); +expr_fn!(arrow_cast, expr data_type); expr_fn!(random); // Array Functions From a576cb7b54afd6ad73f8511626a8339f9dd9ef19 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 16:45:54 +0800 Subject: [PATCH 02/32] feat: add arrow_cast function to cast expressions to specified data types --- python/datafusion/functions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 6ad4c50c..8da34fe4 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -82,6 +82,7 @@ "array_to_string", "array_union", "arrow_typeof", + "arrow_cast", "ascii", "asin", "asinh", @@ -1099,6 +1100,11 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) +def arrow_cast(expr: Expr, data_type: str) -> Expr: + """Casts an expression to a specified data type.""" + return Expr(f.arrow_cast(expr.expr, data_type)) + + def random() -> Expr: """Returns a random value in the range ``0.0 <= x < 1.0``.""" return Expr(f.random()) From 1914a0b6fa1818641247dab667be39a4c19eb8c0 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 16:48:45 +0800 Subject: [PATCH 03/32] docs: add casting section to user guide with examples for arrow_cast function --- .../user-guide/common-operations/functions.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index a0b95c90..519ccdb1 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -78,6 +78,7 @@ Convert to timestamps using :py:func:`~datafusion.functions.to_timestamp` df.select(f.to_timestamp(col('"Total"')).alias("timestamp")) + String ------ @@ -101,6 +102,17 @@ This also includes the functions for regular expressions like :py:func:`~datafus f.regexp_replace(col('"Name"'), literal("saur"), literal("fleur")).alias("flowers") ) +Casting +------- + +Casting expressions to different data types using :py:func:`~datafusion.functions.arrow_cast` + +.. ipython:: python + + df.select( + f.arrow_cast(col('"Total"'), "Float64").alias("total_as_float"), + f.arrow_cast(col('"Total"'), "Int32").alias("total_as_int") + ) Other ----- From e623ae305f8d3d0dc29385e22cca5916085cf3ed Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 16:50:05 +0800 Subject: [PATCH 04/32] test: add unit test for arrow_cast function to validate casting to Float64 and Int32 --- python/tests/test_functions.py | 12 ++++++++++++ src/functions.rs | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 0d40032b..8859bc84 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -905,6 +905,18 @@ def test_temporal_functions(df): ) +def test_arrow_cast(df): + df = df.select( + f.arrow_cast(column("a"), "Float64").alias("a_as_float"), + f.arrow_cast(column("a"), "Int32").alias("a_as_int"), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([1.0, 2.0, 3.0], type=pa.float64()) + assert result.column(1) == pa.array([1, 2, 3], type=pa.int32()) + + def test_case(df): df = df.select( f.case(column("b")).when(literal(4), literal(10)).otherwise(literal(8)), diff --git a/src/functions.rs b/src/functions.rs index 6e6bbd1d..b4664ea5 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -384,8 +384,8 @@ macro_rules! expr_fn { ($FUNC: ident, $($arg:ident)*, $DOC: expr) => { #[doc = $DOC] #[pyfunction] - fn $FUNC($($arg: PyExpr),*, data_type: &str) -> PyExpr { - functions::expr_fn::$FUNC($($arg.into()),*, data_type.to_string()).into() + fn $FUNC($($arg: PyExpr),*) -> PyExpr { + functions::expr_fn::$FUNC($($arg.into()),*).into() } }; } @@ -563,7 +563,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); -expr_fn!(arrow_cast, expr data_type); +expr_fn!(arrow_cast, datatype); expr_fn!(random); // Array Functions From 61115b3bd62f4a6654a866ec58e260a0f14eec33 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 17:41:04 +0800 Subject: [PATCH 05/32] fix: update arrow_cast function to accept Expr type for data_type parameter --- python/datafusion/functions.py | 4 ++-- src/functions.rs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 8da34fe4..0885a7fa 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1100,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: str) -> Expr: +def arrow_cast(expr: Expr, data_type: Expr) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type)) + return Expr(f.arrow_cast(expr.expr, data_type.expr)) def random() -> Expr: diff --git a/src/functions.rs b/src/functions.rs index b4664ea5..b1ee657d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -563,7 +563,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); -expr_fn!(arrow_cast, datatype); +expr_fn!(arrow_cast, column datatype); expr_fn!(random); // Array Functions @@ -856,6 +856,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(range))?; m.add_wrapped(wrap_pyfunction!(array_agg))?; m.add_wrapped(wrap_pyfunction!(arrow_typeof))?; + m.add_wrapped(wrap_pyfunction!(arrow_cast))?; m.add_wrapped(wrap_pyfunction!(ascii))?; m.add_wrapped(wrap_pyfunction!(asin))?; m.add_wrapped(wrap_pyfunction!(asinh))?; From 11071e68469241c80f3610019eb293e45432529a Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 17:41:15 +0800 Subject: [PATCH 06/32] fix: update test_arrow_cast to use literal casting for data types --- python/tests/test_functions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 8859bc84..e55d6701 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -907,8 +907,10 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), "Float64").alias("a_as_float"), - f.arrow_cast(column("a"), "Int32").alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64").cast(pa.string())).alias( + "a_as_float" + ), + f.arrow_cast(column("a"), literal("Int32").cast(pa.string())).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 20cc78150e86381fc926564e8b2736e413ba8aad Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 17:43:33 +0800 Subject: [PATCH 07/32] fix: update arrow_cast function to accept string type for data_type parameter --- python/datafusion/functions.py | 4 ++-- python/tests/test_functions.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0885a7fa..8da34fe4 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1100,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: Expr) -> Expr: +def arrow_cast(expr: Expr, data_type: str) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type.expr)) + return Expr(f.arrow_cast(expr.expr, data_type)) def random() -> Expr: diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e55d6701..8859bc84 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -907,10 +907,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64").cast(pa.string())).alias( - "a_as_float" - ), - f.arrow_cast(column("a"), literal("Int32").cast(pa.string())).alias("a_as_int"), + f.arrow_cast(column("a"), "Float64").alias("a_as_float"), + f.arrow_cast(column("a"), "Int32").alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 1e4e350752263e74680d42bc5463daadffda007e Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 17:54:34 +0800 Subject: [PATCH 08/32] fix: update arrow_cast function to accept Expr type for data_type parameter --- python/datafusion/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 8da34fe4..0885a7fa 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1100,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: str) -> Expr: +def arrow_cast(expr: Expr, data_type: Expr) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type)) + return Expr(f.arrow_cast(expr.expr, data_type.expr)) def random() -> Expr: From 8c7e2f8ac6c6cd4179505bcf993f543e3dd8af0e Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 26 Nov 2024 18:06:29 +0800 Subject: [PATCH 09/32] fix: update test_arrow_cast to use literal for data type parameters --- python/tests/test_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 8859bc84..e00e413a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -907,8 +907,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), "Float64").alias("a_as_float"), - f.arrow_cast(column("a"), "Int32").alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From b80ae94812e460f76f0bcaae59a030569d473932 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 14:37:11 +0800 Subject: [PATCH 10/32] fix: update arrow_cast function to use arg_1 for datatype parameter --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index b1ee657d..2f8a96d9 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -563,7 +563,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); -expr_fn!(arrow_cast, column datatype); +expr_fn!(arrow_cast, arg_1 datatype); expr_fn!(random); // Array Functions From eba0d320820e8f3f9688781f27b2a5579c0e9949 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 14:47:59 +0800 Subject: [PATCH 11/32] fix: update arrow_cast function to accept string type for data_type parameter --- python/datafusion/functions.py | 5 ++--- python/tests/test_functions.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0885a7fa..297b593a 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -17,7 +17,6 @@ """User functions for operating on :py:class:`~datafusion.expr.Expr`.""" from __future__ import annotations - from datafusion._internal import functions as f from datafusion.expr import ( CaseBuilder, @@ -1100,9 +1099,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: Expr) -> Expr: +def arrow_cast(expr: Expr, data_type: str) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type.expr)) + return Expr(f.arrow_cast(expr.expr, literal(data_type).expr)) def random() -> Expr: diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e00e413a..8859bc84 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -907,8 +907,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("a"), "Float64").alias("a_as_float"), + f.arrow_cast(column("a"), "Int32").alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 3a5e210b2cd03032e2c08955d94e23d8389b520e Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 14:48:03 +0800 Subject: [PATCH 12/32] Revert "fix: update arrow_cast function to accept string type for data_type parameter" This reverts commit eba0d320820e8f3f9688781f27b2a5579c0e9949. --- python/datafusion/functions.py | 5 +++-- python/tests/test_functions.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 297b593a..0885a7fa 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -17,6 +17,7 @@ """User functions for operating on :py:class:`~datafusion.expr.Expr`.""" from __future__ import annotations + from datafusion._internal import functions as f from datafusion.expr import ( CaseBuilder, @@ -1099,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: str) -> Expr: +def arrow_cast(expr: Expr, data_type: Expr) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, literal(data_type).expr)) + return Expr(f.arrow_cast(expr.expr, data_type.expr)) def random() -> Expr: diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 8859bc84..e00e413a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -907,8 +907,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), "Float64").alias("a_as_float"), - f.arrow_cast(column("a"), "Int32").alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 856ff8c4cad0075c282089b5368a7c3fd17f03d8 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 15:49:49 +0800 Subject: [PATCH 13/32] fix: update test_arrow_cast to cast literals to string type for arrow_cast function --- python/tests/test_functions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e00e413a..8389c27b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal +from datafusion import literal, expr np.seterr(invalid="ignore") @@ -907,8 +907,10 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64").cast(pa.string())).alias( + "a_as_float" + ), + f.arrow_cast(column("a"), literal("Int32").cast(pa.string())).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From dcaf0d63872cc5549e6afa4fba8de7498ad3ad1f Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 15:49:52 +0800 Subject: [PATCH 14/32] Revert "fix: update test_arrow_cast to cast literals to string type for arrow_cast function" This reverts commit 856ff8c4cad0075c282089b5368a7c3fd17f03d8. --- python/tests/test_functions.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 8389c27b..e00e413a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal, expr +from datafusion import literal np.seterr(invalid="ignore") @@ -907,10 +907,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64").cast(pa.string())).alias( - "a_as_float" - ), - f.arrow_cast(column("a"), literal("Int32").cast(pa.string())).alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 9e1ced7fb56c8aec47bc9f540ea5686c7246f022 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 15:58:56 +0800 Subject: [PATCH 15/32] fix: update arrow_cast function to accept string type for data_type parameter --- python/datafusion/functions.py | 4 ++-- src/functions.rs | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0885a7fa..8da34fe4 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1100,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: Expr) -> Expr: +def arrow_cast(expr: Expr, data_type: str) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type.expr)) + return Expr(f.arrow_cast(expr.expr, data_type)) def random() -> Expr: diff --git a/src/functions.rs b/src/functions.rs index 2f8a96d9..2dda8de6 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -144,6 +144,12 @@ fn concat_ws(sep: String, args: Vec) -> PyResult { Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into()) } +/// Casts an expression to a specified data type. +#[pyfunction] +fn arrow_cast(expr: PyExpr, data_type: &str) -> PyResult { + Ok(functions::expr_fn::arrow_cast(expr.expr, data_type.to_string()).into()) +} + #[pyfunction] #[pyo3(signature = (values, regex, flags=None))] fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { @@ -563,7 +569,6 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); -expr_fn!(arrow_cast, arg_1 datatype); expr_fn!(random); // Array Functions From 8e96e8e74cdea7089a5c160a69fc488c3de07d7e Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 15:59:00 +0800 Subject: [PATCH 16/32] Revert "fix: update arrow_cast function to accept string type for data_type parameter" This reverts commit 9e1ced7fb56c8aec47bc9f540ea5686c7246f022. --- python/datafusion/functions.py | 4 ++-- src/functions.rs | 7 +------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 8da34fe4..0885a7fa 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1100,9 +1100,9 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) -def arrow_cast(expr: Expr, data_type: str) -> Expr: +def arrow_cast(expr: Expr, data_type: Expr) -> Expr: """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type)) + return Expr(f.arrow_cast(expr.expr, data_type.expr)) def random() -> Expr: diff --git a/src/functions.rs b/src/functions.rs index 2dda8de6..2f8a96d9 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -144,12 +144,6 @@ fn concat_ws(sep: String, args: Vec) -> PyResult { Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into()) } -/// Casts an expression to a specified data type. -#[pyfunction] -fn arrow_cast(expr: PyExpr, data_type: &str) -> PyResult { - Ok(functions::expr_fn::arrow_cast(expr.expr, data_type.to_string()).into()) -} - #[pyfunction] #[pyo3(signature = (values, regex, flags=None))] fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { @@ -569,6 +563,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); +expr_fn!(arrow_cast, arg_1 datatype); expr_fn!(random); // Array Functions From 11ed6749e02ab7b34d47fa105961f088f9fc9245 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 16:20:44 +0800 Subject: [PATCH 17/32] fix: add utf8_literal function to create UTF8 literal expressions in tests --- python/tests/test_functions.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e00e413a..11485b05 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal +from datafusion import literal, Expr np.seterr(invalid="ignore") @@ -905,10 +905,15 @@ def test_temporal_functions(df): ) +def utf8_literal(value: str) -> Expr: + """Creates a new expression representing a UTF8 literal value.""" + return literal(pa.scalar(value, type=pa.string())) + + def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("a"), utf8_literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), utf8_literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 193d21cda60083cae7041cbddc7bdd0cc143e44d Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 27 Nov 2024 16:20:48 +0800 Subject: [PATCH 18/32] Revert "fix: add utf8_literal function to create UTF8 literal expressions in tests" This reverts commit 11ed6749e02ab7b34d47fa105961f088f9fc9245. --- python/tests/test_functions.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 11485b05..e00e413a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal, Expr +from datafusion import literal np.seterr(invalid="ignore") @@ -905,15 +905,10 @@ def test_temporal_functions(df): ) -def utf8_literal(value: str) -> Expr: - """Creates a new expression representing a UTF8 literal value.""" - return literal(pa.scalar(value, type=pa.string())) - - def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), utf8_literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), utf8_literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From ba53bd18636a8901ac2d08b477e5402eaf813f82 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 28 Nov 2024 06:35:09 +0800 Subject: [PATCH 19/32] feat: add utf8_literal function to create UTF8 literal expressions --- python/datafusion/__init__.py | 5 +++++ python/datafusion/expr.py | 6 ++++++ python/tests/test_functions.py | 8 +++++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index e0bc57f4..c35997fe 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -107,6 +107,11 @@ def literal(value): return Expr.literal(value) +def utf8_literal(value): + """Create a UTF8 literal expression.""" + return Expr.utf8_literal(value) + + def lit(value): """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index b1072438..81a4d74b 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -380,6 +380,12 @@ def literal(value: Any) -> Expr: value = pa.scalar(value) return Expr(expr_internal.Expr.literal(value)) + @staticmethod + def utf8_literal(value: str) -> Expr: + """Creates a new expression representing a UTF8 literal value.""" + value = pa.scalar(value, type=pa.string()) + return Expr(expr_internal.Expr.literal(value)) + @staticmethod def column(value: str) -> Expr: """Creates a new expression representing a column.""" diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e00e413a..5e4b8e80 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,9 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal +from datafusion import literal, utf8_literal +from datafusion import Expr +from datafusion.expr import expr_internal np.seterr(invalid="ignore") @@ -907,8 +909,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("a"), utf8_literal("Float64")).alias("a_as_float"), + f.arrow_cast(column("a"), utf8_literal("Int32")).alias("a_as_int"), ) result = df.collect() assert len(result) == 1 From 3b83a96edab214821fe175e0ac53667c0deeb9c1 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 28 Nov 2024 06:41:57 +0800 Subject: [PATCH 20/32] fix: update test_arrow_cast to use column 'b' --- python/tests/test_functions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 5e4b8e80..c0a56e02 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -909,14 +909,15 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - f.arrow_cast(column("a"), utf8_literal("Float64")).alias("a_as_float"), - f.arrow_cast(column("a"), utf8_literal("Int32")).alias("a_as_int"), + f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), + f.arrow_cast(column("b"), utf8_literal("Int32")).alias("b_as_int"), ) result = df.collect() assert len(result) == 1 result = result[0] - assert result.column(0) == pa.array([1.0, 2.0, 3.0], type=pa.float64()) - assert result.column(1) == pa.array([1, 2, 3], type=pa.int32()) + + assert result.column(0) == pa.array([4.0, 5.0, 6.0], type=pa.float64()) + assert result.column(1) == pa.array([4, 5, 6], type=pa.int32()) def test_case(df): From cdf32cdec2402e3711559eb52784a6eef3145d80 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 2 Dec 2024 19:42:22 +0800 Subject: [PATCH 21/32] fix: enhance utf8_literal function to handle non-string values --- python/datafusion/expr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 81a4d74b..f9a4c813 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -383,8 +383,10 @@ def literal(value: Any) -> Expr: @staticmethod def utf8_literal(value: str) -> Expr: """Creates a new expression representing a UTF8 literal value.""" - value = pa.scalar(value, type=pa.string()) - return Expr(expr_internal.Expr.literal(value)) + if isinstance(value, str): + value = pa.scalar(value, type=pa.string()) + return Expr(expr_internal.Expr.literal(value)) + return literal(value) @staticmethod def column(value: str) -> Expr: From 187e0775f1b169284decac6d478b54747d15539c Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 2 Dec 2024 19:51:41 +0800 Subject: [PATCH 22/32] Add description for utf8_literal vs literal --- python/datafusion/expr.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index f9a4c813..f1d370d3 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -382,7 +382,10 @@ def literal(value: Any) -> Expr: @staticmethod def utf8_literal(value: str) -> Expr: - """Creates a new expression representing a UTF8 literal value.""" + """Creates a new expression representing a UTF8 literal value. + + It is different from `literal` because it is pa.string() instead of pa.string_view() + """ if isinstance(value, str): value = pa.scalar(value, type=pa.string()) return Expr(expr_internal.Expr.literal(value)) From 0106cb74982ee18265e3610ab28dff58f977eb99 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 3 Dec 2024 10:34:38 +0800 Subject: [PATCH 23/32] docs: clarify utf8_literal function documentation to explain use case --- python/datafusion/expr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index f1d370d3..029abc64 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -385,6 +385,8 @@ def utf8_literal(value: str) -> Expr: """Creates a new expression representing a UTF8 literal value. It is different from `literal` because it is pa.string() instead of pa.string_view() + This is needed for cases where datafusion is expecting a utf8 instead of utf8view literal like in + https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 """ if isinstance(value, str): value = pa.scalar(value, type=pa.string()) From 74cbd3b92aec9ab79d6a6df48c5af7b91e6d0470 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 3 Dec 2024 10:41:06 +0800 Subject: [PATCH 24/32] docs: add clarification comments for utf8_literal usage in arrow_cast tests --- python/tests/test_functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 39518a49..68e28811 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -911,6 +911,9 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( + # we use `utf8_literal` to returns utf8 instead of `literal` which returns utf8view + # because datafusion.arrow_cast # expects a utf8 instead of utf8view + # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), f.arrow_cast(column("b"), utf8_literal("Int32")).alias("b_as_int"), ) From 1c5b91e3db40ca2cc67d8eab92a294218a9f9753 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 3 Dec 2024 10:53:55 +0800 Subject: [PATCH 25/32] docs: implement ruff recommendation --- python/datafusion/expr.py | 7 +++++-- python/tests/test_functions.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 029abc64..a76a0705 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -384,8 +384,11 @@ def literal(value: Any) -> Expr: def utf8_literal(value: str) -> Expr: """Creates a new expression representing a UTF8 literal value. - It is different from `literal` because it is pa.string() instead of pa.string_view() - This is needed for cases where datafusion is expecting a utf8 instead of utf8view literal like in + It is different from `literal` because it is pa.string() instead of + pa.string_view() + + This is needed for cases where DataFusion is expecting a UTF8 instead of + UTF8View literal, like in: https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 """ if isinstance(value, str): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 68e28811..f87041b3 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -911,8 +911,8 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - # we use `utf8_literal` to returns utf8 instead of `literal` which returns utf8view - # because datafusion.arrow_cast # expects a utf8 instead of utf8view + # we use `utf8_literal` to returns utf8 instead of `literal` which returns + # utf8view # because datafusion.arrow_cast expects a utf8 instead of utf8view # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), f.arrow_cast(column("b"), utf8_literal("Int32")).alias("b_as_int"), From 4aa6c7ec06e1b95df388ecc397bcad73e1162f85 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 3 Dec 2024 11:01:09 +0800 Subject: [PATCH 26/32] fix ruff errors --- python/datafusion/expr.py | 2 +- python/tests/test_functions.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index a76a0705..e74f034b 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -394,7 +394,7 @@ def utf8_literal(value: str) -> Expr: if isinstance(value, str): value = pa.scalar(value, type=pa.string()) return Expr(expr_internal.Expr.literal(value)) - return literal(value) + return Expr.literal(value) @staticmethod def column(value: str) -> Expr: diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index f87041b3..282593d9 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -24,8 +24,6 @@ from datafusion import SessionContext, column from datafusion import functions as f from datafusion import literal, utf8_literal -from datafusion import Expr -from datafusion.expr import expr_internal np.seterr(invalid="ignore") From f9814dd4ec18c514a54f89e4ab481ed7dcefed58 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 3 Dec 2024 11:16:08 +0800 Subject: [PATCH 27/32] docs: update examples to use utf8_literal in arrow_cast function --- docs/source/user-guide/common-operations/functions.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index 259435c8..de446588 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -38,7 +38,7 @@ DataFusion offers mathematical functions such as :py:func:`~datafusion.functions .. ipython:: python - from datafusion import col, literal + from datafusion import col, literal, utf8_literal from datafusion import functions as f df.select( @@ -112,8 +112,8 @@ Casting expressions to different data types using :py:func:`~datafusion.function .. ipython:: python df.select( - f.arrow_cast(col('"Total"'), "Float64").alias("total_as_float"), - f.arrow_cast(col('"Total"'), "Int32").alias("total_as_int") + f.arrow_cast(col('"Total"'), utf8_literal("Float64")).alias("total_as_float"), + f.arrow_cast(col('"Total"'), utf8_literal("Int32")).alias("total_as_int") ) Other From 8eb0ed1e0bcb7b3f100cbf071e1d2d237c4e1b75 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 10 Dec 2024 18:00:18 +0800 Subject: [PATCH 28/32] docs: correct typo in comment for utf8_literal usage in test_arrow_cast --- python/tests/test_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 282593d9..842db682 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -909,7 +909,7 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - # we use `utf8_literal` to returns utf8 instead of `literal` which returns + # we use `utf8_literal` to return utf8 instead of `literal` which returns # utf8view # because datafusion.arrow_cast expects a utf8 instead of utf8view # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), From 9216389b462d612d5b366465b9dbd6f49592980e Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 10 Dec 2024 18:02:50 +0800 Subject: [PATCH 29/32] docs: remove redundant comment in test_arrow_cast for clarity --- python/tests/test_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 842db682..bc18b26c 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -910,7 +910,7 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( # we use `utf8_literal` to return utf8 instead of `literal` which returns - # utf8view # because datafusion.arrow_cast expects a utf8 instead of utf8view + # utf8view because datafusion.arrow_cast expects a utf8 instead of utf8view # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), f.arrow_cast(column("b"), utf8_literal("Int32")).alias("b_as_int"), From 5e03c3af8198bb5b80e5a0380228b0c2f91bb582 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 12 Dec 2024 10:26:51 +0800 Subject: [PATCH 30/32] refactor: rename utf8_literal to string_literal and add alias str_lit --- python/datafusion/__init__.py | 13 ++++++++++--- python/datafusion/expr.py | 2 +- python/tests/test_functions.py | 8 ++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index c35997fe..42a54c76 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -107,9 +107,16 @@ def literal(value): return Expr.literal(value) -def utf8_literal(value): - """Create a UTF8 literal expression.""" - return Expr.utf8_literal(value) +def string_literal(value): + """Create a UTF8 literal expression. + It differs from `literal` creates a UTF8view literal. + """ + return Expr.string_literal(value) + + +def str_lit(value): + """Alias for `string_literal`""" + return string_literal(value) def lit(value): diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index e74f034b..16add16f 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -381,7 +381,7 @@ def literal(value: Any) -> Expr: return Expr(expr_internal.Expr.literal(value)) @staticmethod - def utf8_literal(value: str) -> Expr: + def string_literal(value: str) -> Expr: """Creates a new expression representing a UTF8 literal value. It is different from `literal` because it is pa.string() instead of diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index bc18b26c..5dce188e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal, utf8_literal +from datafusion import literal, string_literal np.seterr(invalid="ignore") @@ -909,11 +909,11 @@ def test_temporal_functions(df): def test_arrow_cast(df): df = df.select( - # we use `utf8_literal` to return utf8 instead of `literal` which returns + # we use `string_literal` to return utf8 instead of `literal` which returns # utf8view because datafusion.arrow_cast expects a utf8 instead of utf8view # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 - f.arrow_cast(column("b"), utf8_literal("Float64")).alias("b_as_float"), - f.arrow_cast(column("b"), utf8_literal("Int32")).alias("b_as_int"), + f.arrow_cast(column("b"), string_literal("Float64")).alias("b_as_float"), + f.arrow_cast(column("b"), string_literal("Int32")).alias("b_as_int"), ) result = df.collect() assert len(result) == 1 From 7e280128d30a856c2c2d1e6f6d2a7c4d8d8a8fa5 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 12 Dec 2024 18:18:09 +0800 Subject: [PATCH 31/32] docs: improve docstring for string_literal function for clarity --- docs/source/user-guide/common-operations/functions.rst | 6 +++--- python/datafusion/__init__.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index de446588..add08801 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -38,7 +38,7 @@ DataFusion offers mathematical functions such as :py:func:`~datafusion.functions .. ipython:: python - from datafusion import col, literal, utf8_literal + from datafusion import col, literal, string_literal from datafusion import functions as f df.select( @@ -112,8 +112,8 @@ Casting expressions to different data types using :py:func:`~datafusion.function .. ipython:: python df.select( - f.arrow_cast(col('"Total"'), utf8_literal("Float64")).alias("total_as_float"), - f.arrow_cast(col('"Total"'), utf8_literal("Int32")).alias("total_as_int") + f.arrow_cast(col('"Total"'), string_literal("Float64")).alias("total_as_float"), + f.arrow_cast(col('"Total"'), str_lit("Int32")).alias("total_as_int") ) Other diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 42a54c76..7367b0d3 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -109,13 +109,14 @@ def literal(value): def string_literal(value): """Create a UTF8 literal expression. - It differs from `literal` creates a UTF8view literal. + + It differs from `literal` which creates a UTF8view literal. """ return Expr.string_literal(value) def str_lit(value): - """Alias for `string_literal`""" + """Alias for `string_literal`.""" return string_literal(value) From 5eced8bd34205727c301bc7124b9f1e10b425265 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 16 Dec 2024 13:22:37 +0800 Subject: [PATCH 32/32] docs: update import statement to include str_lit alias for string_literal --- docs/source/user-guide/common-operations/functions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index add08801..12097be8 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -38,7 +38,7 @@ DataFusion offers mathematical functions such as :py:func:`~datafusion.functions .. ipython:: python - from datafusion import col, literal, string_literal + from datafusion import col, literal, string_literal, str_lit from datafusion import functions as f df.select(