Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement extract_datetime_component in libcudf/pylibcudf #16776

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
*
* @param column cudf::column_view of the input datetime values
* @param component The datetime component to extract
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t datetime component
Expand All @@ -239,6 +240,7 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
std::unique_ptr<cudf::column> extract_datetime_component(
cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved

/** @} */ // end of group
Expand Down
81 changes: 48 additions & 33 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -440,83 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::YEAR>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::YEAR, stream, mr);
}

std::unique_ptr<column> extract_month(column_view const& column,
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::MONTH>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::MONTH, stream, mr);
}

std::unique_ptr<column> extract_day(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::DAY>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::DAY, stream, mr);
}

std::unique_ptr<column> extract_weekday(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::WEEKDAY>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
}

std::unique_ptr<column> extract_hour(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::HOUR>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::HOUR, stream, mr);
}

std::unique_ptr<column> extract_minute(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::MINUTE>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
}

std::unique_ptr<column> extract_second(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::SECOND>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::SECOND, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MILLISECOND>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
}

std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MICROSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
}

std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::NANOSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
}

std::unique_ptr<column> last_day_of_month(column_view const& column,
Expand Down Expand Up @@ -643,20 +630,48 @@ std::unique_ptr<column> extract_second(column_view const& column,

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
switch (component) {
case datetime_component::YEAR: return extract_year(column, mr);
case datetime_component::MONTH: return extract_month(column, mr);
case datetime_component::DAY: return extract_day(column, mr);
case datetime_component::WEEKDAY: return extract_weekday(column, mr);
case datetime_component::HOUR: return extract_hour(column, mr);
case datetime_component::MINUTE: return extract_minute(column, mr);
case datetime_component::SECOND: return extract_second(column, mr);
case datetime_component::MILLISECOND: return extract_millisecond_fraction(column, mr);
case datetime_component::MICROSECOND: return extract_microsecond_fraction(column, mr);
case datetime_component::NANOSECOND: return extract_nanosecond_fraction(column, mr);
case datetime_component::YEAR:
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::YEAR>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::MONTH:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MONTH>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::DAY:
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::DAY>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::WEEKDAY:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::WEEKDAY>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::HOUR:
return detail::apply_datetime_op<detail::extract_component_operator<datetime_component::HOUR>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::MINUTE:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MINUTE>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::SECOND:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::SECOND>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::MILLISECOND:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MILLISECOND>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::MICROSECOND:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::MICROSECOND>,
cudf::type_id::INT16>(column, stream, mr);
case datetime_component::NANOSECOND:
return detail::apply_datetime_op<
detail::extract_component_operator<datetime_component::NANOSECOND>,
cudf::type_id::INT16>(column, stream, mr);
default: CUDF_FAIL("Unsupported datetime component.");
}
}
Expand Down
2 changes: 0 additions & 2 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ from pylibcudf.datetime import DatetimeComponent
from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar

import pylibcudf as plc


@acquire_spill_lock()
def add_months(Column col, Column months):
Expand Down
5 changes: 3 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx reduce.pyx
replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
set(cython_sources
aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx labeling.pyx reduce.pyx
replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
4 changes: 2 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/datetime.pxd
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libc.stdint cimport uint8_t
from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar


cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
cpdef enum class datetime_component(int32_t):
cpdef enum class datetime_component(uint8_t):
INVALID
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
YEAR
MONTH
Expand Down
37 changes: 18 additions & 19 deletions python/pylibcudf/pylibcudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import datetime

import pyarrow as pa
import pyarrow.compute as pc
import pylibcudf as plc
import pytest
from utils import assert_column_eq


@pytest.fixture
def date_column(has_nulls):
@pytest.fixture(scope="module", params=["s", "ms", "us", "ns"])
def datetime_column(has_nulls, request):
values = [
pa.scalar(1694004645123456789, pa.timestamp("ns")),
pa.scalar(1544024645123456789, pa.timestamp("ns")),
pa.scalar(1682342345346235434, pa.timestamp("ns")),
pa.scalar(1445624625623452452, pa.timestamp("ns")),
datetime.datetime(1999, 1, 1),
datetime.datetime(2024, 10, 12),
datetime.datetime(1970, 1, 1),
datetime.datetime(2260, 1, 1),
datetime.datetime(2024, 2, 29, 3, 14, 15),
datetime.datetime(2024, 2, 29, 3, 14, 15, 999),
]
if has_nulls:
values[2] = None
return plc.interop.from_arrow(pa.array(values))


def test_extract_year(column):
got = plc.datetime.extract_year(column)
expect = pc.year(plc.interop.to_arrow(column)).cast(pa.int16())

assert_column_eq(expect, got)
return plc.interop.from_arrow(
pa.array(values, type=pa.timestamp(request.param))
)


@pytest.fixture(
Expand All @@ -46,16 +45,16 @@ def component(request):
return request.param


def test_extract_datetime_component(column, component):
def test_extract_datetime_component(datetime_column, component):
attr, component = component
kwargs = {}
if attr == "day_of_week":
kwargs = {"count_from_zero": False}
got = plc.datetime.extract_datetime_component(column, component)
got = plc.datetime.extract_datetime_component(datetime_column, component)
# libcudf produces an int16, arrow produces an int64

expect = getattr(pc, attr)(plc.interop.to_arrow(column), **kwargs).cast(
pa.int16()
)
expect = getattr(pc, attr)(
plc.interop.to_arrow(datetime_column), **kwargs
).cast(pa.int16())

assert_column_eq(expect, got)
Loading