diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx index a8df8c9a92c..96dcd021c3b 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx @@ -1,22 +1,11 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. -import cudf - -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from cudf.core.buffer import acquire_spill_lock -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport ( - from_fixed_point as cpp_from_fixed_point, - is_fixed_point as cpp_is_fixed_point, - to_fixed_point as cpp_to_fixed_point, -) -from pylibcudf.libcudf.types cimport data_type, type_id - from cudf._lib.column cimport Column +from cudf._lib.types cimport dtype_to_pylibcudf_type + +import pylibcudf as plc @acquire_spill_lock() @@ -32,14 +21,10 @@ def from_decimal(Column input_col): ------- A column of strings representing the input decimal values. """ - cdef column_view input_column_view = input_col.view() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_from_fixed_point( - input_column_view)) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.convert.convert_fixed_point.from_fixed_point( + input_col.to_pylibcudf(mode="read"), + ) + return Column.from_pylibcudf(plc_column) @acquire_spill_lock() @@ -57,25 +42,11 @@ def to_decimal(Column input_col, object out_type): ------- A column of decimals parsed from the string values. """ - cdef column_view input_column_view = input_col.view() - cdef unique_ptr[column] c_result - cdef int scale = out_type.scale - cdef data_type c_out_type - if isinstance(out_type, cudf.Decimal32Dtype): - c_out_type = data_type(type_id.DECIMAL32, -scale) - elif isinstance(out_type, cudf.Decimal64Dtype): - c_out_type = data_type(type_id.DECIMAL64, -scale) - elif isinstance(out_type, cudf.Decimal128Dtype): - c_out_type = data_type(type_id.DECIMAL128, -scale) - else: - raise TypeError("should be a decimal dtype") - with nogil: - c_result = move( - cpp_to_fixed_point( - input_column_view, - c_out_type)) - - result = Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point( + input_col.to_pylibcudf(mode="read"), + dtype_to_pylibcudf_type(out_type), + ) + result = Column.from_pylibcudf(plc_column) result.dtype.precision = out_type.precision return result @@ -98,14 +69,8 @@ def is_fixed_point(Column input_col, object dtype): ------- A Column of booleans indicating valid decimal conversion. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = input_col.view() - cdef int scale = dtype.scale - cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale) - with nogil: - c_result = move(cpp_is_fixed_point( - source_view, - c_dtype - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.convert.convert_fixed_point.is_fixed_point( + input_col.to_pylibcudf(mode="read"), + dtype_to_pylibcudf_type(dtype), + ) + return Column.from_pylibcudf(plc_column) diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd index 6f820f3c9a4..72ab329f2dd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd @@ -9,13 +9,13 @@ from pylibcudf.libcudf.types cimport data_type cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] to_fixed_point( - column_view input_col, + column_view input, data_type output_type) except + cdef unique_ptr[column] from_fixed_point( - column_view input_col) except + + column_view input) except + cdef unique_ptr[column] is_fixed_point( - column_view source_strings, - data_type output_type + column_view input, + data_type decimal_type ) except + diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt index 3febc78dfd2..fe8da975566 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt @@ -12,7 +12,9 @@ # the License. # ============================================================================= -set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx) +set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx + convert_fixed_point.pyx +) set(linked_libraries cudf::cudf) rapids_cython_create_modules( diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd index 5525bca46d6..36abf463371 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd @@ -1,2 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . cimport convert_booleans, convert_datetime, convert_durations +from . cimport ( + convert_booleans, + convert_datetime, + convert_durations, + convert_fixed_point, +) diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py index 2340ebe9a26..c0be4093836 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py @@ -1,2 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import convert_booleans, convert_datetime, convert_durations +from . import ( + convert_booleans, + convert_datetime, + convert_durations, + convert_fixed_point, +) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd new file mode 100644 index 00000000000..049b9b3fffe --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column cimport Column +from pylibcudf.types cimport DataType + + +cpdef Column to_fixed_point(Column input, DataType output_type) + +cpdef Column from_fixed_point(Column input) + +cpdef Column is_fixed_point(Column input, DataType decimal_type=*) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx new file mode 100644 index 00000000000..40dadf6f967 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx @@ -0,0 +1,107 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.strings.convert cimport ( + convert_fixed_point as cpp_fixed_point, +) +from pylibcudf.types cimport DataType, type_id + + +cpdef Column to_fixed_point(Column input, DataType output_type): + """ + Returns a new fixed-point column parsing decimal values from the + provided strings column. + + For details, see :cpp:details:`cudf::strings::to_fixed_point` + + Parameters + ---------- + input : Column + Strings instance for this operation. + + output_type : DataType + Type of fixed-point column to return including the scale value. + + Returns + ------- + Column + New column of output_type. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_fixed_point.to_fixed_point( + input.view(), + output_type.c_obj, + ) + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column from_fixed_point(Column input): + """ + Returns a new strings column converting the fixed-point values + into a strings column. + + For details, see :cpp:details:`cudf::strings::from_fixed_point` + + Parameters + ---------- + input : Column + Fixed-point column to convert. + + Returns + ------- + Column + New strings column. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_fixed_point.from_fixed_point( + input.view(), + ) + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column is_fixed_point(Column input, DataType decimal_type=None): + """ + Returns a boolean column identifying strings in which all + characters are valid for conversion to fixed-point. + + For details, see :cpp:details:`cudf::strings::is_fixed_point` + + Parameters + ---------- + input : Column + Strings instance for this operation. + + decimal_type : DataType + Fixed-point type (with scale) used only for checking overflow. + Defaults to Decimal64 + + Returns + ------- + Column + New column of boolean results for each string. + """ + cdef unique_ptr[column] c_result + + if decimal_type is None: + decimal_type = DataType(type_id.DECIMAL64) + + with nogil: + c_result = move( + cpp_fixed_point.is_fixed_point( + input.view(), + decimal_type.c_obj, + ) + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py new file mode 100644 index 00000000000..b1c4d729604 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import decimal + +import pyarrow as pa +import pylibcudf as plc +from utils import assert_column_eq + + +def test_to_fixed_point(): + typ = pa.decimal128(38, 2) + arr = pa.array(["123", "1.23", None]) + result = plc.strings.convert.convert_fixed_point.to_fixed_point( + plc.interop.from_arrow(arr), plc.interop.from_arrow(typ) + ) + expected = arr.cast(typ) + assert_column_eq(result, expected) + + +def test_from_fixed_point(): + arr = pa.array([decimal.Decimal("1.1"), None]) + result = plc.strings.convert.convert_fixed_point.from_fixed_point( + plc.interop.from_arrow(arr), + ) + expected = pa.array(["1.1", None]) + assert_column_eq(result, expected) + + +def test_is_fixed_point(): + arr = pa.array(["123", "1.23", "1.2.3", "", None]) + result = plc.strings.convert.convert_fixed_point.is_fixed_point( + plc.interop.from_arrow(arr), + ) + expected = pa.array([True, True, False, False, None]) + assert_column_eq(result, expected)