From fc828486bd70723e162075f243bf2b61f1cc45b6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 6 Sep 2024 12:09:37 -0700 Subject: [PATCH 01/14] impl/tests --- cpp/include/cudf/datetime.hpp | 34 +++++ cpp/src/datetime/datetime_ops.cu | 74 +++++----- cpp/tests/datetime/datetime_ops_test.cpp | 130 ++++++++++++++++++ python/pylibcudf/pylibcudf/datetime.pxd | 7 + python/pylibcudf/pylibcudf/datetime.pyx | 34 ++++- .../pylibcudf/libcudf/CMakeLists.txt | 4 +- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 18 +++ .../pylibcudf/pylibcudf/libcudf/datetime.pyx | 0 .../pylibcudf/tests/test_datetime.py | 46 ++++++- 9 files changed, 300 insertions(+), 47 deletions(-) create mode 100644 python/pylibcudf/pylibcudf/libcudf/datetime.pyx diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index f7bed8bdc7e..db406bb7ea8 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -37,6 +37,23 @@ namespace datetime { * @file */ +/** + * @brief Types of datetime components that may be extracted. + */ +enum class datetime_component { + INVALID = 0, + YEAR, + MONTH, + DAY, + WEEKDAY, + HOUR, + MINUTE, + SECOND, + MILLISECOND, + MICROSECOND, + NANOSECOND +}; + /** * @brief Extracts year from any datetime type and returns an int16_t * cudf::column. @@ -148,6 +165,7 @@ std::unique_ptr extract_second( * @returns cudf::column of the extracted int16_t milliseconds * @throw cudf::logic_error if input column datatype is not TIMESTAMP */ + std::unique_ptr extract_millisecond_fraction( cudf::column_view const& column, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); @@ -186,6 +204,22 @@ std::unique_ptr extract_nanosecond_fraction( cudf::column_view const& column, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); +/** + * @brief Extracts the specified datetime component from any datetime type and + * returns an int16_t cudf::column. + * + * @param column cudf::column_view of the input datetime values + * @param component The datetime component to extract + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t datetime component + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_datetime_component( + cudf::column_view const& column, + datetime_component component, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group /** * @addtogroup datetime_compute diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 7629cad79a9..49b4b9841fc 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -44,19 +44,6 @@ namespace cudf { namespace datetime { namespace detail { -enum class datetime_component { - INVALID = 0, - YEAR, - MONTH, - DAY, - WEEKDAY, - HOUR, - MINUTE, - SECOND, - MILLISECOND, - MICROSECOND, - NANOSECOND -}; enum class rounding_function { CEIL, ///< Rounds up to the next integer multiple of the provided frequency @@ -453,63 +440,56 @@ std::unique_ptr extract_year(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_day(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_weekday(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_hour(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_minute(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_second(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, @@ -517,7 +497,7 @@ std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< - detail::extract_component_operator, + detail::extract_component_operator, cudf::type_id::INT16>(column, stream, mr); } @@ -526,7 +506,7 @@ std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< - detail::extract_component_operator, + detail::extract_component_operator, cudf::type_id::INT16>(column, stream, mr); } @@ -535,7 +515,7 @@ std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::device_async_resource_ref mr) { return detail::apply_datetime_op< - detail::extract_component_operator, + detail::extract_component_operator, cudf::type_id::INT16>(column, stream, mr); } @@ -648,6 +628,26 @@ std::unique_ptr extract_second(column_view const& column, rmm::device_as return detail::extract_second(column, cudf::get_default_stream(), mr); } +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + switch (component) { + case datetime_component::YEAR: return extract_year(column, mr); + case datetime_component::MONTH: return extract_month(column, mr); + case datetime_component::DAY: return extract_day(column, mr); + case datetime_component::WEEKDAY: return extract_weekday(column, mr); + case datetime_component::HOUR: return extract_hour(column, mr); + case datetime_component::MINUTE: return extract_minute(column, mr); + case datetime_component::SECOND: return extract_second(column, mr); + case datetime_component::MILLISECOND: return extract_millisecond_fraction(column, mr); + case datetime_component::MICROSECOND: return extract_microsecond_fraction(column, mr); + case datetime_component::NANOSECOND: return extract_nanosecond_fraction(column, mr); + default: CUDF_FAIL("Unsupported datetime component."); + } +} + std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::device_async_resource_ref mr) { diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 13577c4d0ea..603edb27c7c 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{0, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns), fixed_width_column_wrapper{766, 424, 623}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1969, 1970, 1970}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{12, 1, 1}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{31, 1, 1}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{23, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{762, 0, 929}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{976, 23, 987}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{675, 432, 234}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{766, 424, 623}); } template diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 2fce48cf1b4..72ce680ba7a 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,8 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.libcudf.datetime cimport datetime_component + from .column cimport Column cpdef Column extract_year( Column col ) + +cpdef Column extract_datetime_component( + Column col, + datetime_component component +) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 0ddc68bcb9d..4345c5428c4 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -2,7 +2,14 @@ from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.datetime cimport extract_year as cpp_extract_year +from pylibcudf.libcudf.datetime cimport ( + datetime_component, + extract_datetime_component as cpp_extract_datetime_component, + extract_year as cpp_extract_year, +) + +from pylibcudf.libcudf.datetime import \ + datetime_component as DatetimeComponent # no-cython-lint from .column cimport Column @@ -28,3 +35,28 @@ cpdef Column extract_year( with nogil: result = move(cpp_extract_year(values.view())) return Column.from_libcudf(move(result)) + +cpdef Column extract_datetime_component( + Column values, + datetime_component component +): + """ + Extract a datetime component from a datetime column. + + Parameters + ---------- + values : Column + The column to extract the component from. + component : DatetimeComponent + The datetime component to extract. + + Returns + ------- + Column + Column with the extracted component. + """ + cdef unique_ptr[column] result + + with nogil: + result = move(cpp_extract_datetime_component(values.view(), component)) + return Column.from_libcudf(move(result)) diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt index b04e94f1546..0964b653573 100644 --- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt @@ -12,8 +12,8 @@ # the License. # ============================================================================= -set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx reduce.pyx replace.pyx - round.pyx stream_compaction.pyx types.pyx unary.pyx +set(cython_sources aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx reduce.pyx + replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index a4465343197..af3bd1ffcf0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -7,6 +8,19 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: + cpdef enum class datetime_component(int32_t): + INVALID + YEAR + MONTH + DAY + WEEKDAY + HOUR + MINUTE + SECOND + MILLISECOND + MICROSECOND + NANOSECOND + cdef unique_ptr[column] extract_year(const column_view& column) except + cdef unique_ptr[column] extract_month(const column_view& column) except + cdef unique_ptr[column] extract_day(const column_view& column) except + @@ -23,6 +37,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_nanosecond_fraction( const column_view& column ) except + + cdef unique_ptr[column] extract_datetime_component( + const column_view& column, + datetime_component component + ) except + ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": DAY "cudf::datetime::rounding_frequency::DAY" diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pyx b/python/pylibcudf/pylibcudf/libcudf/datetime.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index d3aa6101e2d..d2244e98ada 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -1,7 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -import datetime - import pyarrow as pa import pyarrow.compute as pc import pylibcudf as plc @@ -12,14 +10,14 @@ @pytest.fixture def column(has_nulls): values = [ - datetime.date(1999, 1, 1), - datetime.date(2024, 10, 12), - datetime.date(1, 1, 1), - datetime.date(9999, 1, 1), + pa.scalar(1694004645123456789, pa.timestamp("ns")), + pa.scalar(1544024645123456789, pa.timestamp("ns")), + pa.scalar(1682342345346235434, pa.timestamp("ns")), + pa.scalar(1445624625623452452, pa.timestamp("ns")), ] if has_nulls: values[2] = None - return plc.interop.from_arrow(pa.array(values, type=pa.date32())) + return plc.interop.from_arrow(pa.array(values)) def test_extract_year(column): @@ -28,3 +26,37 @@ def test_extract_year(column): expect = pc.year(plc.interop.to_arrow(column)).cast(pa.int16()) assert_column_eq(expect, got) + + +@pytest.fixture( + params=[ + ("year", plc.datetime.DatetimeComponent.YEAR), + ("month", plc.datetime.DatetimeComponent.MONTH), + ("day", plc.datetime.DatetimeComponent.DAY), + ("day_of_week", plc.datetime.DatetimeComponent.WEEKDAY), + ("hour", plc.datetime.DatetimeComponent.HOUR), + ("minute", plc.datetime.DatetimeComponent.MINUTE), + ("second", plc.datetime.DatetimeComponent.SECOND), + ("millisecond", plc.datetime.DatetimeComponent.MILLISECOND), + ("microsecond", plc.datetime.DatetimeComponent.MICROSECOND), + ("nanosecond", plc.datetime.DatetimeComponent.NANOSECOND), + ], + ids=lambda x: x[0], +) +def component(request): + return request.param + + +def test_extract_datetime_component(column, component): + attr, component = component + kwargs = {} + if attr == "day_of_week": + kwargs = {"count_from_zero": False} + got = plc.datetime.extract_datetime_component(column, component) + # libcudf produces an int16, arrow produces an int64 + + expect = getattr(pc, attr)(plc.interop.to_arrow(column), **kwargs).cast( + pa.int16() + ) + + assert_column_eq(expect, got) From d9eef45f5188853684e441d5b2a4188606d58e47 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 6 Sep 2024 12:15:34 -0700 Subject: [PATCH 02/14] cleanup --- python/pylibcudf/pylibcudf/datetime.pyx | 2 ++ python/pylibcudf/pylibcudf/tests/test_datetime.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 4345c5428c4..0c231fbad62 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -43,6 +43,8 @@ cpdef Column extract_datetime_component( """ Extract a datetime component from a datetime column. + For details, see :cpp:func:`cudf::datetime::extract_datetime_component`. + Parameters ---------- values : Column diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index d2244e98ada..c48ae673afb 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -22,7 +22,6 @@ def column(has_nulls): def test_extract_year(column): got = plc.datetime.extract_year(column) - # libcudf produces an int16, arrow produces an int64 expect = pc.year(plc.interop.to_arrow(column)).cast(pa.int16()) assert_column_eq(expect, got) From 637f6329ed9c2ea9c98fb2710cc09d79d735d38a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 6 Sep 2024 14:45:02 -0700 Subject: [PATCH 03/14] plumb to cudf cython --- python/cudf/cudf/_lib/datetime.pyx | 57 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 483250dd36f..d844466120f 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -13,6 +13,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.filling cimport calendrical_month_sequence from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type +from pylibcudf.datetime import DatetimeComponent from cudf._lib.column cimport Column from cudf._lib.scalar cimport DeviceScalar @@ -41,38 +42,34 @@ def extract_datetime_component(Column col, object field): cdef unique_ptr[column] c_result cdef column_view col_view = col.view() - - with nogil: - if field == "year": - c_result = move(libcudf_datetime.extract_year(col_view)) - elif field == "month": - c_result = move(libcudf_datetime.extract_month(col_view)) - elif field == "day": - c_result = move(libcudf_datetime.extract_day(col_view)) - elif field == "weekday": - c_result = move(libcudf_datetime.extract_weekday(col_view)) - elif field == "hour": - c_result = move(libcudf_datetime.extract_hour(col_view)) - elif field == "minute": - c_result = move(libcudf_datetime.extract_minute(col_view)) - elif field == "second": - c_result = move(libcudf_datetime.extract_second(col_view)) - elif field == "millisecond": - c_result = move( - libcudf_datetime.extract_millisecond_fraction(col_view) - ) - elif field == "microsecond": - c_result = move( - libcudf_datetime.extract_microsecond_fraction(col_view) - ) - elif field == "nanosecond": + cdef libcudf_datetime.datetime_component component + + component_names = { + "year": DatetimeComponent.YEAR, + "month": DatetimeComponent.MONTH, + "day": DatetimeComponent.DAY, + "weekday": DatetimeComponent.WEEKDAY, + "hour": DatetimeComponent.HOUR, + "minute": DatetimeComponent.MINUTE, + "second": DatetimeComponent.SECOND, + "millisecond": DatetimeComponent.MILLISECOND, + "microsecond": DatetimeComponent.MICROSECOND, + "nanosecond": DatetimeComponent.NANOSECOND, + } + if field == "day_of_year": + with nogil: + c_result = move(libcudf_datetime.day_of_year(col_view)) + elif field in component_names: + component = component_names[field] + with nogil: c_result = move( - libcudf_datetime.extract_nanosecond_fraction(col_view) + libcudf_datetime.extract_datetime_component( + col_view, + component + ) ) - elif field == "day_of_year": - c_result = move(libcudf_datetime.day_of_year(col_view)) - else: - raise ValueError(f"Invalid datetime field: '{field}'") + else: + raise ValueError(f"Invalid field: '{field}'") result = Column.from_unique_ptr(move(c_result)) From 80a62bab36be2d14d587149e9dea4949fc97cd86 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Mon, 16 Sep 2024 08:45:44 -0500 Subject: [PATCH 04/14] Apply suggestions from code review Co-authored-by: Bradley Dice Co-authored-by: Lawrence Mitchell --- cpp/include/cudf/datetime.hpp | 6 +++--- cpp/src/datetime/datetime_ops.cu | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index db406bb7ea8..e4f96db164b 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -40,7 +40,7 @@ namespace datetime { /** * @brief Types of datetime components that may be extracted. */ -enum class datetime_component { +enum class datetime_component : uint8_t { INVALID = 0, YEAR, MONTH, @@ -205,7 +205,7 @@ std::unique_ptr extract_nanosecond_fraction( rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** - * @brief Extracts the specified datetime component from any datetime type and + * @brief Extracts the specified datetime component from any datetime type and * returns an int16_t cudf::column. * * @param column cudf::column_view of the input datetime values @@ -218,7 +218,7 @@ std::unique_ptr extract_nanosecond_fraction( std::unique_ptr extract_datetime_component( cudf::column_view const& column, datetime_component component, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group /** diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 49b4b9841fc..c8716dc8eb7 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -630,7 +630,7 @@ std::unique_ptr extract_second(column_view const& column, rmm::device_as std::unique_ptr extract_datetime_component(cudf::column_view const& column, datetime_component component, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); switch (component) { From da8f3b7221594e70e10947561fe6ea03e2164be7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 24 Sep 2024 19:33:14 -0700 Subject: [PATCH 05/14] refactor --- cpp/src/datetime/datetime_ops.cu | 80 +++++++++++++++++++------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 7296a8339ea..a25f579560e 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -440,83 +440,70 @@ std::unique_ptr extract_year(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::YEAR, stream, mr); } std::unique_ptr extract_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::MONTH, stream, mr); } std::unique_ptr extract_day(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::DAY, stream, mr); } std::unique_ptr extract_weekday(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr); } std::unique_ptr extract_hour(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::HOUR, stream, mr); } std::unique_ptr extract_minute(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::MINUTE, stream, mr); } std::unique_ptr extract_second(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::SECOND, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr); } std::unique_ptr last_day_of_month(column_view const& column, @@ -648,16 +635,43 @@ std::unique_ptr extract_datetime_component(cudf::column_view const { CUDF_FUNC_RANGE(); switch (component) { - case datetime_component::YEAR: return extract_year(column, stream, mr); - case datetime_component::MONTH: return extract_month(column, stream, mr); - case datetime_component::DAY: return extract_day(column, stream, mr); - case datetime_component::WEEKDAY: return extract_weekday(column, stream, mr); - case datetime_component::HOUR: return extract_hour(column, stream, mr); - case datetime_component::MINUTE: return extract_minute(column, stream, mr); - case datetime_component::SECOND: return extract_second(column, stream, mr); - case datetime_component::MILLISECOND: return extract_millisecond_fraction(column, stream, mr); - case datetime_component::MICROSECOND: return extract_microsecond_fraction(column, stream, mr); - case datetime_component::NANOSECOND: return extract_nanosecond_fraction(column, stream, mr); + case datetime_component::YEAR: + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::MONTH: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::DAY: + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::WEEKDAY: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::HOUR: + return detail::apply_datetime_op, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::MINUTE: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::SECOND: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::MILLISECOND: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::MICROSECOND: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); + case datetime_component::NANOSECOND: + return detail::apply_datetime_op< + detail::extract_component_operator, + cudf::type_id::INT16>(column, stream, mr); default: CUDF_FAIL("Unsupported datetime component."); } } From be9073ea8991aaf526bcc16798c873e0342f4e02 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 24 Sep 2024 19:45:17 -0700 Subject: [PATCH 06/14] address reviews --- python/pylibcudf/pylibcudf/libcudf/datetime.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index af3bd1ffcf0..a35d5e21007 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int32_t +from libc.stdint cimport uint8_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,7 +8,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: - cpdef enum class datetime_component(int32_t): + cpdef enum class datetime_component(uint8_t): INVALID YEAR MONTH From 1745cb633801774e56a3790087b010fa5d1ae06a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 25 Sep 2024 12:00:39 -0700 Subject: [PATCH 07/14] use macro --- cpp/src/datetime/datetime_ops.cu | 52 +++++++++----------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index a25f579560e..6cac16aa3ea 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -41,6 +41,11 @@ #include #include +#define extract(field) \ + case field: \ + return detail::apply_datetime_op, \ + cudf::type_id::INT16>(column, stream, mr) + namespace cudf { namespace datetime { namespace detail { @@ -635,43 +640,16 @@ std::unique_ptr extract_datetime_component(cudf::column_view const { CUDF_FUNC_RANGE(); switch (component) { - case datetime_component::YEAR: - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::MONTH: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::DAY: - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::WEEKDAY: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::HOUR: - return detail::apply_datetime_op, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::MINUTE: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::SECOND: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::MILLISECOND: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::MICROSECOND: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); - case datetime_component::NANOSECOND: - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + extract(datetime_component::YEAR); + extract(datetime_component::MONTH); + extract(datetime_component::DAY); + extract(datetime_component::WEEKDAY); + extract(datetime_component::HOUR); + extract(datetime_component::MINUTE); + extract(datetime_component::SECOND); + extract(datetime_component::MILLISECOND); + extract(datetime_component::MICROSECOND); + extract(datetime_component::NANOSECOND); default: CUDF_FAIL("Unsupported datetime component."); } } From a6a909c0ee57aac5cdfa9caef0d8fe6b7b426bdf Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 27 Sep 2024 10:34:41 -0700 Subject: [PATCH 08/14] address reviews --- cpp/src/datetime/datetime_ops.cu | 45 +++++++++++++++++++------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 6cac16aa3ea..f7c6c3e6453 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -41,11 +41,6 @@ #include #include -#define extract(field) \ - case field: \ - return detail::apply_datetime_op, \ - cudf::type_id::INT16>(column, stream, mr) - namespace cudf { namespace datetime { namespace detail { @@ -548,6 +543,32 @@ std::unique_ptr extract_quarter(column_view const& column, return apply_datetime_op(column, stream, mr); } +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ +#define extract(field) \ + case field: \ + return apply_datetime_op, cudf::type_id::INT16>( \ + column, stream, mr) + + switch (component) { + extract(datetime_component::YEAR); + extract(datetime_component::MONTH); + extract(datetime_component::DAY); + extract(datetime_component::WEEKDAY); + extract(datetime_component::HOUR); + extract(datetime_component::MINUTE); + extract(datetime_component::SECOND); + extract(datetime_component::MILLISECOND); + extract(datetime_component::MICROSECOND); + extract(datetime_component::NANOSECOND); + default: CUDF_FAIL("Unsupported datetime component."); + } +#undef extract +} + } // namespace detail std::unique_ptr ceil_datetimes(column_view const& column, @@ -639,19 +660,7 @@ std::unique_ptr extract_datetime_component(cudf::column_view const rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - switch (component) { - extract(datetime_component::YEAR); - extract(datetime_component::MONTH); - extract(datetime_component::DAY); - extract(datetime_component::WEEKDAY); - extract(datetime_component::HOUR); - extract(datetime_component::MINUTE); - extract(datetime_component::SECOND); - extract(datetime_component::MILLISECOND); - extract(datetime_component::MICROSECOND); - extract(datetime_component::NANOSECOND); - default: CUDF_FAIL("Unsupported datetime component."); - } + return detail::extract_datetime_component(column, component, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, From 5fd94685dcf58e0b052b54843901a1872019fccc Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 1 Oct 2024 19:53:32 -0700 Subject: [PATCH 09/14] use enum in polars code --- python/cudf_polars/cudf_polars/dsl/expr.py | 40 ++++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index c401e5a2f17..fe4c8eee45a 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -961,16 +961,16 @@ def do_evaluate( class TemporalFunction(Expr): __slots__ = ("name", "options", "children") _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = { - pl_expr.TemporalFunction.Year: "year", - pl_expr.TemporalFunction.Month: "month", - pl_expr.TemporalFunction.Day: "day", - pl_expr.TemporalFunction.WeekDay: "weekday", - pl_expr.TemporalFunction.Hour: "hour", - pl_expr.TemporalFunction.Minute: "minute", - pl_expr.TemporalFunction.Second: "second", - pl_expr.TemporalFunction.Millisecond: "millisecond", - pl_expr.TemporalFunction.Microsecond: "microsecond", - pl_expr.TemporalFunction.Nanosecond: "nanosecond", + pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR, + pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH, + pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY, + pl_expr.TemporalFunction.WeekDay: plc.datetime.DatetimeComponent.WEEKDAY, + pl_expr.TemporalFunction.Hour: plc.datetime.DatetimeComponent.HOUR, + pl_expr.TemporalFunction.Minute: plc.datetime.DatetimeComponent.MINUTE, + pl_expr.TemporalFunction.Second: plc.datetime.DatetimeComponent.SECOND, + pl_expr.TemporalFunction.Millisecond: plc.datetime.DatetimeComponent.MILLISECOND, + pl_expr.TemporalFunction.Microsecond: plc.datetime.DatetimeComponent.MICROSECOND, + pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND, } _non_child = ("dtype", "name", "options") children: tuple[Expr, ...] @@ -1003,8 +1003,12 @@ def do_evaluate( ] (column,) = columns if self.name == pl_expr.TemporalFunction.Microsecond: - millis = plc.datetime.extract_datetime_component(column.obj, "millisecond") - micros = plc.datetime.extract_datetime_component(column.obj, "microsecond") + millis = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MILLISECOND + ) + micros = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MICROSECOND + ) millis_as_micros = plc.binaryop.binary_operation( millis, plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())), @@ -1019,9 +1023,15 @@ def do_evaluate( ) return Column(total_micros) elif self.name == pl_expr.TemporalFunction.Nanosecond: - millis = plc.datetime.extract_datetime_component(column.obj, "millisecond") - micros = plc.datetime.extract_datetime_component(column.obj, "microsecond") - nanos = plc.datetime.extract_datetime_component(column.obj, "nanosecond") + millis = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MILLISECOND + ) + micros = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MICROSECOND + ) + nanos = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.NANOSECOND + ) millis_as_nanos = plc.binaryop.binary_operation( millis, plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())), From 844e3dc3249e711ed20a3770713a465b0c528586 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 2 Oct 2024 13:02:17 -0700 Subject: [PATCH 10/14] minor fixes --- cpp/include/cudf/detail/datetime.hpp | 10 ++++++++++ cpp/src/datetime/datetime_ops.cu | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 9db7e48498f..df3050d6494 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -115,6 +115,16 @@ std::unique_ptr extract_nanosecond_fraction(cudf::column_view cons rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); +/** + * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component, + * rmm::cuda_stream_view, rmm::device_async_resource_ref) + * + */ +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + /** * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view, * rmm::device_async_resource_ref) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index f7c6c3e6453..a497cedb3bc 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -440,70 +440,70 @@ std::unique_ptr extract_year(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::YEAR, stream, mr); + return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr); } std::unique_ptr extract_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::MONTH, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr); } std::unique_ptr extract_day(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::DAY, stream, mr); + return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr); } std::unique_ptr extract_weekday(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr); + return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr); } std::unique_ptr extract_hour(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::HOUR, stream, mr); + return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr); } std::unique_ptr extract_minute(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::MINUTE, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr); } std::unique_ptr extract_second(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::SECOND, stream, mr); + return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr); + return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr); } std::unique_ptr last_day_of_month(column_view const& column, From 722c6f98439689d8c0d35b8049fde4d011505cb8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:03:55 -0500 Subject: [PATCH 11/14] Update cpp/include/cudf/datetime.hpp Co-authored-by: Matthew Murray <41342305+Matt711@users.noreply.github.com> --- cpp/include/cudf/datetime.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 8e71b2d320e..0c9a2aa5859 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -241,7 +241,7 @@ std::unique_ptr extract_datetime_component( cudf::column_view const& column, datetime_component component, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group /** From af1397c3a0478ecd7d03776d97f4725021eed671 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 3 Oct 2024 12:24:40 -0500 Subject: [PATCH 12/14] Update python/pylibcudf/pylibcudf/datetime.pyx --- python/pylibcudf/pylibcudf/datetime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 0c231fbad62..784d29128bf 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -43,7 +43,7 @@ cpdef Column extract_datetime_component( """ Extract a datetime component from a datetime column. - For details, see :cpp:func:`cudf::datetime::extract_datetime_component`. + For details, see :cpp:func:`cudf::extract_datetime_component`. Parameters ---------- From fa191ef8ee3bf6abbbbee7a81e99b90ac6eed1a7 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 4 Oct 2024 10:30:10 +0100 Subject: [PATCH 13/14] Remove unnecessary whitespace change --- cpp/include/cudf/datetime.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 0c9a2aa5859..4bf8d9d34e9 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -181,7 +181,6 @@ std::unique_ptr extract_second( * @returns cudf::column of the extracted int16_t milliseconds * @throw cudf::logic_error if input column datatype is not TIMESTAMP */ - std::unique_ptr extract_millisecond_fraction( cudf::column_view const& column, rmm::cuda_stream_view stream = cudf::get_default_stream(), From 09626d293648dd3addb7d173b0477c32b68612e5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 4 Oct 2024 10:11:18 -0700 Subject: [PATCH 14/14] remove INVALID --- cpp/include/cudf/datetime.hpp | 1 - python/pylibcudf/pylibcudf/libcudf/datetime.pxd | 1 - 2 files changed, 2 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 4bf8d9d34e9..1eaea5b6374 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -42,7 +42,6 @@ namespace datetime { * @brief Types of datetime components that may be extracted. */ enum class datetime_component : uint8_t { - INVALID = 0, YEAR, MONTH, DAY, diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index a35d5e21007..73cdfb96af5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -9,7 +9,6 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cpdef enum class datetime_component(uint8_t): - INVALID YEAR MONTH DAY