From 3d66fb1f59e62214a691e0bb41e32db6ec3e5a10 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Thu, 15 Aug 2024 20:43:07 +0200 Subject: [PATCH 1/9] first test refactoring --- tests/expr_and_series/dt/to_string_test.py | 82 +++++++++++++++------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index b78f2d1f7..e0f8beed5 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -6,6 +6,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import compare_dicts from tests.utils import is_windows data = { @@ -44,6 +45,14 @@ def test_dt_to_string(constructor_eager: Any, fmt: str) -> None: assert result == expected_col +def _clean_string(result: str) -> str: + # rstrip '0' to remove trailing zeros, as different libraries handle this differently + # if there's then a trailing `.`, remove that too. + if "." in result: + result = result.rstrip("0").rstrip(".") + return result + + @pytest.mark.parametrize( ("data", "expected"), [ @@ -54,16 +63,9 @@ def test_dt_to_string(constructor_eager: Any, fmt: str) -> None: ], ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") -def test_dt_to_string_iso_local_datetime( +def test_dt_to_string_iso_local_datetime_series( constructor_eager: Any, data: datetime, expected: str ) -> None: - def _clean_string(result: str) -> str: - # rstrip '0' to remove trailing zeros, as different libraries handle this differently - # if there's then a trailing `.`, remove that too. - if "." in result: - result = result.rstrip("0").rstrip(".") - return result - df = constructor_eager({"a": [data]}) result = ( nw.from_native(df, eager_only=True)["a"] @@ -72,13 +74,6 @@ def _clean_string(result: str) -> str: ) assert _clean_string(result) == _clean_string(expected) - result = ( - nw.from_native(df, eager_only=True) - .select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f"))["a"] - .to_list()[0] - ) - assert _clean_string(result) == _clean_string(expected) - result = ( nw.from_native(df, eager_only=True)["a"] .dt.to_string("%Y-%m-%dT%H:%M:%S%.f") @@ -86,12 +81,39 @@ def _clean_string(result: str) -> str: ) assert _clean_string(result) == _clean_string(expected) - result = ( - nw.from_native(df, eager_only=True) - .select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f"))["a"] - .to_list()[0] + +@pytest.mark.parametrize( + ("data", "expected"), + [ + (datetime(2020, 1, 9, 12, 34, 56), "2020-01-09T12:34:56.000000"), + (datetime(2020, 1, 9, 12, 34, 56, 123), "2020-01-09T12:34:56.000123"), + (datetime(2020, 1, 9, 12, 34, 56, 123456), "2020-01-09T12:34:56.123456"), + ], +) +@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") +def test_dt_to_string_iso_local_datetime_expr( + constructor: Any, data: datetime, expected: str +) -> None: + df = constructor({"a": [data]}) + + result = nw.from_native(df).with_columns( + nw.col("a") + .dt.to_string("%Y-%m-%dT%H:%M:%S.%f") + .str.replace_all(r"0+$", "") + .str.replace_all(r"\.$", "") + .alias("b") ) - assert _clean_string(result) == _clean_string(expected) + compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]}) + + result = nw.from_native(df).with_columns( + nw.col("a") + .dt.to_string("%Y-%m-%dT%H:%M:%S%.f") + .str.replace_all(r"0+$", "") + .str.replace_all(r"\.$", "") + .alias("b") + ) + expected = _clean_string(expected) + compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]}) @pytest.mark.parametrize( @@ -99,7 +121,7 @@ def _clean_string(result: str) -> str: [(datetime(2020, 1, 9), "2020-01-09")], ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") -def test_dt_to_string_iso_local_date( +def test_dt_to_string_iso_local_date_series( constructor_eager: Any, data: datetime, expected: str ) -> None: df = constructor_eager({"a": [data]}) @@ -108,9 +130,17 @@ def test_dt_to_string_iso_local_date( ) assert result == expected - result = ( - nw.from_native(df, eager_only=True) - .select(b=nw.col("a").dt.to_string("%Y-%m-%d"))["b"] - .to_list()[0] + +@pytest.mark.parametrize( + ("data", "expected"), + [(datetime(2020, 1, 9), "2020-01-09")], +) +@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") +def test_dt_to_string_iso_local_date_expr( + constructor: Any, data: datetime, expected: str +) -> None: + df = constructor({"a": [data]}) + result = nw.from_native(df).with_columns( + nw.col("a").dt.to_string("%Y-%m-%d").alias("b") ) - assert result == expected + compare_dicts(result, {"a": [data], "b": [expected]}) From 114d16abe88ff54adde1c17763debc4af9875b71 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Fri, 16 Aug 2024 19:29:06 +0200 Subject: [PATCH 2/9] tests: complete expr/series test separation --- tests/expr_and_series/dt/to_string_test.py | 61 +++++++++++++++++----- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index e0f8beed5..41a518c21 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -18,31 +18,60 @@ @pytest.mark.parametrize( - "fmt", ["%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%G-W%V-%u", "%G-W%V"] + "fmt", + [ + "%Y-%m-%d", + "%Y-%m-%d %H:%M:%S", + "%Y/%m/%d %H:%M:%S", + "%G-W%V-%u", + "%G-W%V", + ], ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") -def test_dt_to_string(constructor_eager: Any, fmt: str) -> None: +def test_dt_to_string_series(constructor_eager: Any, fmt: str) -> None: input_frame = nw.from_native(constructor_eager(data), eager_only=True) input_series = input_frame["a"] expected_col = [datetime.strftime(d, fmt) for d in data["a"]] - result = input_series.dt.to_string(fmt).to_list() + result = input_frame.select(input_series.dt.to_string(fmt)) + if any( x in str(constructor_eager) for x in ["pandas_pyarrow", "pyarrow_table", "modin"] ): # PyArrow differs from other libraries, in that %S also shows # the fraction of a second. - result = [x[: x.find(".")] if "." in x else x for x in result] - assert result == expected_col - result = input_frame.select(nw.col("a").dt.to_string(fmt))["a"].to_list() - if any( - x in str(constructor_eager) for x in ["pandas_pyarrow", "pyarrow_table", "modin"] - ): + result = input_frame.select( + input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "") + ) + + compare_dicts(result, {"a": expected_col}) + + +@pytest.mark.parametrize( + "fmt", + [ + "%Y-%m-%d", + "%Y-%m-%d %H:%M:%S", + "%Y/%m/%d %H:%M:%S", + "%G-W%V-%u", + "%G-W%V", + ], +) +@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") +def test_dt_to_string_expr(constructor: Any, fmt: str) -> None: + input_frame = nw.from_native(constructor(data)) + + expected_col = [datetime.strftime(d, fmt) for d in data["a"]] + + result = input_frame.select(nw.col("a").dt.to_string(fmt).alias("b")) + if any(x in str(constructor) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]): # PyArrow differs from other libraries, in that %S also shows # the fraction of a second. - result = [x[: x.find(".")] if "." in x else x for x in result] - assert result == expected_col + result = input_frame.select( + nw.col("a").dt.to_string(fmt).str.replace(r"\.\d+$", "").alias("b") + ) + compare_dicts(result, {"b": expected_col}) def _clean_string(result: str) -> str: @@ -59,7 +88,10 @@ def _clean_string(result: str) -> str: (datetime(2020, 1, 9), "2020-01-09T00:00:00.000000"), (datetime(2020, 1, 9, 12, 34, 56), "2020-01-09T12:34:56.000000"), (datetime(2020, 1, 9, 12, 34, 56, 123), "2020-01-09T12:34:56.000123"), - (datetime(2020, 1, 9, 12, 34, 56, 123456), "2020-01-09T12:34:56.123456"), + ( + datetime(2020, 1, 9, 12, 34, 56, 123456), + "2020-01-09T12:34:56.123456", + ), ], ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") @@ -87,7 +119,10 @@ def test_dt_to_string_iso_local_datetime_series( [ (datetime(2020, 1, 9, 12, 34, 56), "2020-01-09T12:34:56.000000"), (datetime(2020, 1, 9, 12, 34, 56, 123), "2020-01-09T12:34:56.000123"), - (datetime(2020, 1, 9, 12, 34, 56, 123456), "2020-01-09T12:34:56.123456"), + ( + datetime(2020, 1, 9, 12, 34, 56, 123456), + "2020-01-09T12:34:56.123456", + ), ], ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") From 8c15dd5f963007d27e0aa3c9db3ea82ad42a2766 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Sat, 17 Aug 2024 17:57:29 +0200 Subject: [PATCH 3/9] feat: add first implementation --- narwhals/_dask/expr.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 40fef92a4..c3a8ae3f0 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -475,7 +475,9 @@ def fill_null(self, value: Any) -> DaskExpr: ) def clip( - self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None + self: Self, + lower_bound: Any | None = None, + upper_bound: Any | None = None, ) -> Self: return self._from_call( lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper), @@ -798,6 +800,14 @@ def ordinal_day(self) -> DaskExpr: returns_scalar=False, ) + def to_string(self, format: str) -> DaskExpr: # noqa: A002 + return self._expr._from_call( + lambda _input, _format: _input.dt.strftime(_format), + "strftime", + format, + returns_scalar=False, + ) + class DaskExprNameNamespace: def __init__(self: Self, expr: DaskExpr) -> None: From c45781de3b583d48204d09707940cb578b5e737f Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Sun, 18 Aug 2024 11:28:53 +0200 Subject: [PATCH 4/9] some more refactoring --- tests/expr_and_series/dt/to_string_test.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index 41a518c21..7a63b0120 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -82,6 +82,11 @@ def _clean_string(result: str) -> str: return result +def _clean_string_expr(e: Any) -> Any: + # Same as `_clean_string` but for Expr + return e.str.replace_all(r"0+$", "").str.replace_all(r"\.$", "") + + @pytest.mark.parametrize( ("data", "expected"), [ @@ -132,22 +137,13 @@ def test_dt_to_string_iso_local_datetime_expr( df = constructor({"a": [data]}) result = nw.from_native(df).with_columns( - nw.col("a") - .dt.to_string("%Y-%m-%dT%H:%M:%S.%f") - .str.replace_all(r"0+$", "") - .str.replace_all(r"\.$", "") - .alias("b") + _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f")).alias("b") ) compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]}) result = nw.from_native(df).with_columns( - nw.col("a") - .dt.to_string("%Y-%m-%dT%H:%M:%S%.f") - .str.replace_all(r"0+$", "") - .str.replace_all(r"\.$", "") - .alias("b") + _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f")).alias("b") ) - expected = _clean_string(expected) compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]}) From 39a9de789dcb1ed4b89a4bc0448eae7bcd8c868c Mon Sep 17 00:00:00 2001 From: Luciano <66913960+lucianosrp@users.noreply.github.com> Date: Sun, 18 Aug 2024 14:52:17 +0200 Subject: [PATCH 5/9] Apply suggestions from code review Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- tests/expr_and_series/dt/to_string_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index 7a63b0120..ddd6965b8 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -34,16 +34,14 @@ def test_dt_to_string_series(constructor_eager: Any, fmt: str) -> None: expected_col = [datetime.strftime(d, fmt) for d in data["a"]] - result = input_frame.select(input_series.dt.to_string(fmt)) + result = {"a": input_series.dt.to_string(fmt)} if any( x in str(constructor_eager) for x in ["pandas_pyarrow", "pyarrow_table", "modin"] ): # PyArrow differs from other libraries, in that %S also shows # the fraction of a second. - result = input_frame.select( - input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "") - ) + result = {"a": input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "")} compare_dicts(result, {"a": expected_col}) From 89c51e56bdbb409f704025a8ed1dde0158a3267f Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Sun, 18 Aug 2024 15:09:13 +0200 Subject: [PATCH 6/9] fix: add '.f' handling --- narwhals/_dask/expr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index c3a8ae3f0..e1b03a51e 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -804,7 +804,7 @@ def to_string(self, format: str) -> DaskExpr: # noqa: A002 return self._expr._from_call( lambda _input, _format: _input.dt.strftime(_format), "strftime", - format, + format.replace("%.f", ".%f"), returns_scalar=False, ) From 3e16709fa99f11e2e4e08313ac9d4289d32370f8 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Mon, 19 Aug 2024 20:25:44 +0200 Subject: [PATCH 7/9] tests: add modin catch --- tests/expr_and_series/dt/to_string_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index ddd6965b8..8524d6014 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -166,8 +166,11 @@ def test_dt_to_string_iso_local_date_series( ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") def test_dt_to_string_iso_local_date_expr( - constructor: Any, data: datetime, expected: str + request: Any, constructor: Any, data: datetime, expected: str ) -> None: + if "modin" in str(constructor): + request.applymarker(pytest.mark.xfail) + df = constructor({"a": [data]}) result = nw.from_native(df).with_columns( nw.col("a").dt.to_string("%Y-%m-%d").alias("b") From 6c0838481dc23bfe5ab798b22aa34d9c7623c56b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 18:30:47 +0000 Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_dask/expr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 2d56d64b1..faedb6095 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -800,12 +800,11 @@ def ordinal_day(self) -> DaskExpr: returns_scalar=False, ) - def to_string(self, format: str) -> DaskExpr: # noqa: A002 return self._expr._from_call( lambda _input, _format: _input.dt.strftime(_format), "strftime", - format.replace("%.f", ".%f"), + format.replace("%.f", ".%f"), returns_scalar=False, ) From 1d90d8fcac2effbdcb0fea73762234fc00fd8d4c Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Mon, 19 Aug 2024 20:38:30 +0200 Subject: [PATCH 9/9] tests: add modin catch --- tests/expr_and_series/dt/to_string_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py index 8524d6014..8cd7ae4c5 100644 --- a/tests/expr_and_series/dt/to_string_test.py +++ b/tests/expr_and_series/dt/to_string_test.py @@ -130,8 +130,10 @@ def test_dt_to_string_iso_local_datetime_series( ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") def test_dt_to_string_iso_local_datetime_expr( - constructor: Any, data: datetime, expected: str + request: Any, constructor: Any, data: datetime, expected: str ) -> None: + if "modin" in str(constructor): + request.applymarker(pytest.mark.xfail) df = constructor({"a": [data]}) result = nw.from_native(df).with_columns(