Skip to content

Commit

Permalink
Implement extract_datetime_component in libcudf/pylibcudf (#16776)
Browse files Browse the repository at this point in the history
Closes #16735

Authors:
  - https://github.com/brandon-b-miller
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)

URL: #16776
  • Loading branch information
brandon-b-miller authored Oct 7, 2024
1 parent 7e1e475 commit 2d02bdc
Show file tree
Hide file tree
Showing 12 changed files with 358 additions and 138 deletions.
34 changes: 34 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ namespace datetime {
* @file
*/

/**
* @brief Types of datetime components that may be extracted.
*/
enum class datetime_component : uint8_t {
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

/**
* @brief Extracts year from any datetime type and returns an int16_t
* cudf::column.
Expand Down Expand Up @@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts the specified datetime component from any datetime type and
* returns an int16_t cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param component The datetime component to extract
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t datetime component
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_datetime_component(
cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
/**
* @addtogroup datetime_compute
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/detail/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
* rmm::cuda_stream_view, rmm::device_async_resource_ref)
*
*/
std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
* rmm::device_async_resource_ref)
Expand Down
88 changes: 45 additions & 43 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,6 @@
namespace cudf {
namespace datetime {
namespace detail {
enum class datetime_component {
INVALID = 0,
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

enum class rounding_function {
CEIL, ///< Rounds up to the next integer multiple of the provided frequency
Expand Down Expand Up @@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::YEAR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
}

std::unique_ptr<column> extract_month(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MONTH>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
}

std::unique_ptr<column> extract_day(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::DAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
}

std::unique_ptr<column> extract_weekday(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
}

std::unique_ptr<column> extract_hour(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::HOUR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
}

std::unique_ptr<column> extract_minute(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MINUTE>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
}

std::unique_ptr<column> extract_second(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::SECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
}

std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
}

std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
}

std::unique_ptr<column> last_day_of_month(column_view const& column,
Expand Down Expand Up @@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
#define extract(field) \
case field: \
return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
column, stream, mr)

switch (component) {
extract(datetime_component::YEAR);
extract(datetime_component::MONTH);
extract(datetime_component::DAY);
extract(datetime_component::WEEKDAY);
extract(datetime_component::HOUR);
extract(datetime_component::MINUTE);
extract(datetime_component::SECOND);
extract(datetime_component::MILLISECOND);
extract(datetime_component::MICROSECOND);
extract(datetime_component::NANOSECOND);
default: CUDF_FAIL("Unsupported datetime component.");
}
#undef extract
}

} // namespace detail

std::unique_ptr<column> ceil_datetimes(column_view const& column,
Expand Down Expand Up @@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
return detail::extract_second(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::extract_datetime_component(column, component, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
Expand Down
130 changes: 130 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
fixed_width_column_wrapper<int16_t>{766, 424, 623});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR),
fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{10, 7, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH),
fixed_width_column_wrapper<int16_t>{12, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{26, 4, 25});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY),
fixed_width_column_wrapper<int16_t>{31, 1, 1});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
fixed_width_column_wrapper<int16_t>{2, 3, 3});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{14, 12, 7});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{14, 12, 7});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR),
fixed_width_column_wrapper<int16_t>{23, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{1, 0, 32});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{1, 0, 32});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE),
fixed_width_column_wrapper<int16_t>{59, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{12, 0, 12});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{12, 0, 12});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND),
fixed_width_column_wrapper<int16_t>{59, 0, 0});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{762, 0, 929});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND),
fixed_width_column_wrapper<int16_t>{976, 23, 987});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND),
fixed_width_column_wrapper<int16_t>{675, 432, 234});

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND),
fixed_width_column_wrapper<int16_t>{766, 424, 623});
}

template <typename T>
Expand Down
Loading

0 comments on commit 2d02bdc

Please sign in to comment.