diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx index 55ff38f472d..fe19379bf93 100644 --- a/python/cudf/cudf/_lib/string_casting.pyx +++ b/python/cudf/cudf/_lib/string_casting.pyx @@ -525,12 +525,11 @@ def timedelta2int(Column input_col, dtype, format): """ dtype = dtype_to_pylibcudf_type(dtype) - cdef string c_timestamp_format = format.encode('UTF-8') return Column.from_pylibcudf( plc.strings.convert.convert_durations.to_durations( input_col.to_pylibcudf(mode="read"), dtype, - c_timestamp_format + format ) ) @@ -549,12 +548,10 @@ def int2timedelta(Column input_col, str format): A Column with Timedelta represented in string format """ - - cdef string c_duration_format = format.encode('UTF-8') return Column.from_pylibcudf( plc.strings.convert.convert_durations.from_durations( input_col.to_pylibcudf(mode="read"), - c_duration_format + format ) ) diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd index ebe10574353..43ffad1d89f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd @@ -10,7 +10,7 @@ from pylibcudf.libcudf.types cimport data_type cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] to_durations( - const column_view & strings_col, + const column_view & input, data_type duration_type, const string & format) except + diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd index ac11b8959ed..eecdade4ef9 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd @@ -8,10 +8,10 @@ from pylibcudf.types cimport DataType cpdef Column to_durations( Column input, DataType duration_type, - const string& format + str format ) cpdef Column from_durations( - Column input, - const string& format + Column durations, + str format=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx index f3e0b7c9c8e..76c5809c3d5 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx @@ -15,27 +15,80 @@ from pylibcudf.types import DataType cpdef Column to_durations( Column input, DataType duration_type, - const string& format + str format ): + """ + Returns a new duration column converting a strings column into + durations using the provided format pattern. + + For details, see cpp:func:`cudf::strings::to_durations` + + Parameters + ---------- + input : Column + Strings instance for this operation. + + duration_type : DataType + The duration type used for creating the output column. + + format : str + String specifying the duration format in strings. + + Returns + ------- + Column + New duration column. + """ cdef unique_ptr[column] c_result + cdef string c_format = format.encode() + with nogil: - c_result = cpp_convert_durations.to_durations( - input.view(), - duration_type.c_obj, - format + c_result = move( + cpp_convert_durations.to_durations( + input.view(), + duration_type.c_obj, + c_format + ) ) return Column.from_libcudf(move(c_result)) cpdef Column from_durations( - Column input, - const string& format + Column durations, + str format=None ): + """ + Returns a new strings column converting a duration column into + strings using the provided format pattern. + + For details, see cpp:func:`cudf::strings::from_durations` + + Parameters + ---------- + durations : Column + Duration values to convert. + + format : str + The string specifying output format. + Default format is "%D days %H:%M:%S". + + Returns + ------- + Column + New strings column with formatted durations. + """ cdef unique_ptr[column] c_result + + if format is None: + format = "%D days %H:%M:%S" + cdef string c_format = format.encode() + with nogil: - c_result = cpp_convert_durations.from_durations( - input.view(), - format + c_result = move( + cpp_convert_durations.from_durations( + durations.view(), + c_format + ) ) return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py index 22bb4971cb1..69f7a0fdd33 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py @@ -1,7 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from datetime import datetime - import pyarrow as pa import pylibcudf as plc import pytest @@ -21,39 +19,16 @@ def timestamp_type(request): return request.param -@pytest.fixture( - scope="module", - params=[ - pa.duration("ns"), - pa.duration("us"), - pa.duration("ms"), - pa.duration("s"), - ], -) -def duration_type(request): - return request.param - - @pytest.fixture(scope="module") def pa_timestamp_col(): return pa.array(["2011-01-01", "2011-01-02", "2011-01-03"]) -@pytest.fixture(scope="module") -def pa_duration_col(): - return pa.array(["05:20:25"]) - - @pytest.fixture(scope="module") def plc_timestamp_col(pa_timestamp_col): return plc.interop.from_arrow(pa_timestamp_col) -@pytest.fixture(scope="module") -def plc_duration_col(pa_duration_col): - return plc.interop.from_arrow(pa_duration_col) - - @pytest.mark.parametrize("format", ["%Y-%m-%d"]) def test_to_datetime( pa_timestamp_col, plc_timestamp_col, timestamp_type, format @@ -65,21 +40,3 @@ def test_to_datetime( format, ) assert_column_eq(expect, got) - - -@pytest.mark.parametrize("format", ["%H:%M:%S"]) -def test_to_duration(pa_duration_col, plc_duration_col, duration_type, format): - def to_timedelta(duration_str): - date = datetime.strptime(duration_str, format) - return date - datetime(1900, 1, 1) # "%H:%M:%S" zero date - - expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast( - duration_type - ) - - got = plc.strings.convert.convert_durations.to_durations( - plc_duration_col, - plc.interop.from_arrow(duration_type), - format.encode(), - ) - assert_column_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py new file mode 100644 index 00000000000..6d704309bfd --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from datetime import datetime, timedelta + +import pyarrow as pa +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +@pytest.fixture( + params=[ + pa.duration("ns"), + pa.duration("us"), + pa.duration("ms"), + pa.duration("s"), + ], +) +def duration_type(request): + return request.param + + +@pytest.fixture(scope="module") +def pa_duration_col(): + return pa.array(["05:20:25"]) + + +@pytest.fixture(scope="module") +def plc_duration_col(pa_duration_col): + return plc.interop.from_arrow(pa_duration_col) + + +def test_to_duration(pa_duration_col, plc_duration_col, duration_type): + format = "%H:%M:%S" + + def to_timedelta(duration_str): + date = datetime.strptime(duration_str, format) + return date - datetime(1900, 1, 1) # "%H:%M:%S" zero date + + expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast( + duration_type + ) + + got = plc.strings.convert.convert_durations.to_durations( + plc_duration_col, + plc.interop.from_arrow(duration_type), + format, + ) + assert_column_eq(expect, got) + + +@pytest.mark.parametrize("format", [None, "%D days %H:%M:%S"]) +def test_from_durations(format): + pa_array = pa.array( + [timedelta(days=1, hours=1, minutes=1, seconds=1), None] + ) + result = plc.strings.convert.convert_durations.from_durations( + plc.interop.from_arrow(pa_array), format + ) + expected = pa.array(["1 days 01:01:01", None]) + assert_column_eq(result, expected)