Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into histogram_insert_or_apply
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Oct 7, 2024
2 parents 79f07dd + 2d02bdc commit de720e5
Show file tree
Hide file tree
Showing 63 changed files with 702 additions and 366 deletions.
43 changes: 36 additions & 7 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,18 +1,47 @@
---
# Notes on disabled checks
# ------------------------
# modernize-use-equals-default:
# auto-fix is broken (doesn't insert =default correctly)
# modernize-concat-nested-namespaces:
# auto-fix is broken (can delete code)
# modernize-use-trailing-return-type:
# Purely stylistic, no benefit to rewriting everything
# modernize-return-braced-init-list:
# Stylistically we prefer to see the return type at the return site.
# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
# for more information.
# modernize-use-bool-literals:
# Our tests use int flags for validity masks extensively and we prefer that
# clang-analyzer-cplusplus.NewDeleteLeaks:
# This check has numerous bugs, see
# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
# We encounter at least
# https://github.com/llvm/llvm-project/issues/60896
# https://github.com/llvm/llvm-project/issues/69602
# clang-analyzer-optin.core.EnumCastOutOfRange
# We use enums as flags in multiple cases and this check makes ORing flags invalid
# clang-analyzer-optin.cplusplus.UninitializedObject'
# There is an error in nanoarrow that none of the clang-tidy filters (i.e.
# header-filter and exclude-header-filter are able to properly avoid. This
# merits further investigation
#
# We need to verify that broken checks are still broken
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type,
-modernize-use-bool-literals'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference
-modernize-return-braced-init-list,
-modernize-use-bool-literals,
clang-analyzer-*,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
Expand Down
8 changes: 5 additions & 3 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

# This function finds nanoarrow and sets any additional necessary environment variables.
function(find_and_configure_nanoarrow)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")

# Currently we need to always build nanoarrow so we don't pickup a previous installed version
set(CPM_DOWNLOAD_nanoarrow ON)
rapids_cpm_find(
nanoarrow 0.6.0.dev
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
)
set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
38 changes: 38 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
index caa6be4..70ec8a2 100644
--- a/src/nanoarrow/common/inline_buffer.h
+++ b/src/nanoarrow/common/inline_buffer.h
@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
}

static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
- *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
+ *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
((values[7] + 0x7f) & 0x80));
@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
// set bits within a single byte
const uint8_t only_byte_mask =
i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
- bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] &= only_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
return;
}

// set/clear trailing bits of first byte
- bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] &= first_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);

if (bytes_end - bytes_begin > 2) {
@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
// Zero out the last byte
- *out_cursor = 0x00;
+ *out_cursor = 0x00; // NOLINT
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
18 changes: 18 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

{
"packages" : {
"nanoarrow" : {
"version" : "0.6.0.dev",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_shallow" : false,
"patches" : [
{
"file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
"issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
"fixed_in" : ""
}
]
}
}
}
34 changes: 34 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ namespace datetime {
* @file
*/

/**
* @brief Types of datetime components that may be extracted.
*/
enum class datetime_component : uint8_t {
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

/**
* @brief Extracts year from any datetime type and returns an int16_t
* cudf::column.
Expand Down Expand Up @@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts the specified datetime component from any datetime type and
* returns an int16_t cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param component The datetime component to extract
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t datetime component
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_datetime_component(
cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
/**
* @addtogroup datetime_compute
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/detail/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
* rmm::cuda_stream_view, rmm::device_async_resource_ref)
*
*/
std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
* rmm::device_async_resource_ref)
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class table {
std::vector<column_view> columns(std::distance(begin, end));
std::transform(
begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
{
std::vector<column_view> columns(std::distance(begin, end));
std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
88 changes: 45 additions & 43 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,6 @@
namespace cudf {
namespace datetime {
namespace detail {
enum class datetime_component {
INVALID = 0,
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

enum class rounding_function {
CEIL, ///< Rounds up to the next integer multiple of the provided frequency
Expand Down Expand Up @@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::YEAR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
}

std::unique_ptr<column> extract_month(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MONTH>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
}

std::unique_ptr<column> extract_day(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::DAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
}

std::unique_ptr<column> extract_weekday(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
}

std::unique_ptr<column> extract_hour(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::HOUR>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
}

std::unique_ptr<column> extract_minute(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MINUTE>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
}

std::unique_ptr<column> extract_second(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::SECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
}

std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
}

std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return detail::apply_datetime_op<
detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
cudf::type_id::INT16>(column, stream, mr);
return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
}

std::unique_ptr<column> last_day_of_month(column_view const& column,
Expand Down Expand Up @@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
#define extract(field) \
case field: \
return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
column, stream, mr)

switch (component) {
extract(datetime_component::YEAR);
extract(datetime_component::MONTH);
extract(datetime_component::DAY);
extract(datetime_component::WEEKDAY);
extract(datetime_component::HOUR);
extract(datetime_component::MINUTE);
extract(datetime_component::SECOND);
extract(datetime_component::MILLISECOND);
extract(datetime_component::MICROSECOND);
extract(datetime_component::NANOSECOND);
default: CUDF_FAIL("Unsupported datetime component.");
}
#undef extract
}

} // namespace detail

std::unique_ptr<column> ceil_datetimes(column_view const& column,
Expand Down Expand Up @@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
return detail::extract_second(column, stream, mr);
}

std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::extract_datetime_component(column, component, stream, mr);
}

std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
Expand Down
Loading

0 comments on commit de720e5

Please sign in to comment.