Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string.convert.convert_ipv4 APIs to pylibcudf #16994

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 12 additions & 30 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@ from pylibcudf.libcudf.strings.convert.convert_integers cimport (
is_hex as cpp_is_hex,
to_integers as cpp_to_integers,
)
from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
integers_to_ipv4 as cpp_integers_to_ipv4,
ipv4_to_integers as cpp_ipv4_to_integers,
is_ipv4 as cpp_is_ipv4,
)
from pylibcudf.libcudf.types cimport data_type, type_id

from cudf._lib.types cimport underlying_type_t_type_id
Expand Down Expand Up @@ -572,14 +567,10 @@ def int2ip(Column input_col):
A Column with integer represented in string ipv4 format

"""

cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_integers_to_ipv4(input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
input_col.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def ip2int(Column input_col):
Expand All @@ -595,14 +586,10 @@ def ip2int(Column input_col):
A Column with ipv4 represented as integer

"""

cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_ipv4_to_integers(input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.ipv4_to_integers(
input_col.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def is_ipv4(Column source_strings):
Expand All @@ -611,15 +598,10 @@ def is_ipv4(Column source_strings):
that have strings in IPv4 format. This format is nnn.nnn.nnn.nnn
where nnn is integer digits in [0,255].
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_is_ipv4(
source_view
))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.is_ipv4(
source_strings.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def htoi(Column input_col, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] ipv4_to_integers(
column_view input_col) except +
column_view input) except +
vyasr marked this conversation as resolved.
Show resolved Hide resolved

cdef unique_ptr[column] integers_to_ipv4(
column_view input_col) except +
column_view integers) except +

cdef unique_ptr[column] is_ipv4(
column_view source_strings
column_view input
) except +
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# =============================================================================

set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
convert_fixed_point.pyx
convert_fixed_point.pyx convert_ipv4.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ from . cimport (
convert_datetime,
convert_durations,
convert_fixed_point,
convert_ipv4,
)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/strings/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
convert_datetime,
convert_durations,
convert_fixed_point,
convert_ipv4,
)
10 changes: 10 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column


cpdef Column ipv4_to_integers(Column input)

cpdef Column integers_to_ipv4(Column integers)

cpdef Column is_ipv4(Column input)
92 changes: 92 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.column cimport Column
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4


cpdef Column ipv4_to_integers(Column input):
"""
Converts IPv4 addresses into integers.

For details, see cpp:func:`cudf::strings::ipv4_to_integers`

Parameters
----------
input : Column
Strings instance for this operation

Returns
-------
Column
New uint32 column converted from strings.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.ipv4_to_integers(
input.view()
)
)

return Column.from_libcudf(move(c_result))


cpdef Column integers_to_ipv4(Column integers):
"""
Converts integers into IPv4 addresses as strings.

For details, see cpp:func:`cudf::strings::integers_to_ipv4`

Parameters
----------
integers : Column
Integer (uint32) column to convert.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isays uint32 whereas ipv4_to_integers above says it produces int32. Should these be the same, or is one really unsigned while the other is signed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this was a documentation mistake on my end. ipv4_to_integers should say it returns uint32 integers (matching the cpp docstring and the unit test in this PR)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK cool yes please match those when you resolve conflicts, then you are good to go.


Returns
-------
Column
New strings column.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.integers_to_ipv4(
integers.view()
)
)

return Column.from_libcudf(move(c_result))


cpdef Column is_ipv4(Column input):
"""
Returns a boolean column identifying strings in which all
characters are valid for conversion to integers from IPv4 format.

For details, see cpp:func:`cudf::strings::is_ipv4`

Parameters
----------
input : Column
Strings instance for this operation.

Returns
-------
Column
New column of boolean results for each string.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.is_ipv4(
input.view()
)
)

return Column.from_libcudf(move(c_result))
31 changes: 31 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import pyarrow as pa
import pylibcudf as plc
from utils import assert_column_eq


def test_ipv4_to_integers():
arr = pa.array(["123.45.67.890", None])
result = plc.strings.convert.convert_ipv4.ipv4_to_integers(
plc.interop.from_arrow(arr)
)
expected = pa.array([2066564730, None], type=pa.uint32())
assert_column_eq(result, expected)


def test_integers_to_ipv4():
arr = pa.array([1, 0, None], type=pa.uint32())
result = plc.strings.convert.convert_ipv4.integers_to_ipv4(
plc.interop.from_arrow(arr)
)
expected = pa.array(["0.0.0.1", "0.0.0.0", None])
assert_column_eq(result, expected)


def test_is_ipv4():
arr = pa.array(["0.0.0.1", "1.2.34", "A", None])
result = plc.strings.convert.convert_ipv4.is_ipv4(
plc.interop.from_arrow(arr)
)
expected = pa.array([True, False, False, None])
assert_column_eq(result, expected)
Loading