Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into pylibcudf-nvtext-generate_ngrams
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 authored Oct 7, 2024
2 parents ff9fb42 + 2d02bdc commit 577a7fc
Show file tree
Hide file tree
Showing 64 changed files with 732 additions and 366 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/pr_issue_status_automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,33 @@ jobs:
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit

process-branch-name:
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: get-project-id
runs-on: ubuntu-latest
outputs:
branch-name: ${{ steps.process-branch-name.outputs.branch-name }}
steps:
- name: Extract branch name
id: process-branch-name
run: |
branch=${{ github.event.pull_request.base.ref }}
release=${branch#branch-}
echo "branch-name=$release" >> "$GITHUB_OUTPUT"
update-release:
# This job sets the PR and its linked issues to the release they are targeting
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: [get-project-id, process-branch-name]
with:
PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
SINGLE_SELECT_FIELD_ID: "PVTSSF_lADOAp2shc4AiNzlzgg52UQ"
SINGLE_SELECT_FIELD_NAME: "Release"
SINGLE_SELECT_OPTION_VALUE: "${{ needs.process-branch-name.outputs.branch-name }}"
ITEM_PROJECT_ID: "${{ needs.get-project-id.outputs.ITEM_PROJECT_ID }}"
ITEM_NODE_ID: "${{ github.event.pull_request.node_id }}"
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit
43 changes: 36 additions & 7 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,18 +1,47 @@
---
# Notes on disabled checks
# ------------------------
# modernize-use-equals-default:
# auto-fix is broken (doesn't insert =default correctly)
# modernize-concat-nested-namespaces:
# auto-fix is broken (can delete code)
# modernize-use-trailing-return-type:
# Purely stylistic, no benefit to rewriting everything
# modernize-return-braced-init-list:
# Stylistically we prefer to see the return type at the return site.
# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
# for more information.
# modernize-use-bool-literals:
# Our tests use int flags for validity masks extensively and we prefer that
# clang-analyzer-cplusplus.NewDeleteLeaks:
# This check has numerous bugs, see
# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
# We encounter at least
# https://github.com/llvm/llvm-project/issues/60896
# https://github.com/llvm/llvm-project/issues/69602
# clang-analyzer-optin.core.EnumCastOutOfRange
# We use enums as flags in multiple cases and this check makes ORing flags invalid
# clang-analyzer-optin.cplusplus.UninitializedObject'
# There is an error in nanoarrow that none of the clang-tidy filters (i.e.
# header-filter and exclude-header-filter are able to properly avoid. This
# merits further investigation
#
# We need to verify that broken checks are still broken
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type,
-modernize-use-bool-literals'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference
-modernize-return-braced-init-list,
-modernize-use-bool-literals,
clang-analyzer-*,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
Expand Down
8 changes: 5 additions & 3 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

# This function finds nanoarrow and sets any additional necessary environment variables.
function(find_and_configure_nanoarrow)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")

# Currently we need to always build nanoarrow so we don't pickup a previous installed version
set(CPM_DOWNLOAD_nanoarrow ON)
rapids_cpm_find(
nanoarrow 0.6.0.dev
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
)
set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
38 changes: 38 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
index caa6be4..70ec8a2 100644
--- a/src/nanoarrow/common/inline_buffer.h
+++ b/src/nanoarrow/common/inline_buffer.h
@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
}

static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
- *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
+ *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
((values[7] + 0x7f) & 0x80));
@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
// set bits within a single byte
const uint8_t only_byte_mask =
i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
- bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] &= only_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
return;
}

// set/clear trailing bits of first byte
- bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] &= first_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);

if (bytes_end - bytes_begin > 2) {
@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
// Zero out the last byte
- *out_cursor = 0x00;
+ *out_cursor = 0x00; // NOLINT
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
18 changes: 18 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

{
"packages" : {
"nanoarrow" : {
"version" : "0.6.0.dev",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_shallow" : false,
"patches" : [
{
"file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
"issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
"fixed_in" : ""
}
]
}
}
}
34 changes: 34 additions & 0 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ namespace datetime {
* @file
*/

/**
* @brief Types of datetime components that may be extracted.
*/
enum class datetime_component : uint8_t {
YEAR,
MONTH,
DAY,
WEEKDAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

/**
* @brief Extracts year from any datetime type and returns an int16_t
* cudf::column.
Expand Down Expand Up @@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Extracts the specified datetime component from any datetime type and
* returns an int16_t cudf::column.
*
* @param column cudf::column_view of the input datetime values
* @param component The datetime component to extract
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the returned column
*
* @returns cudf::column of the extracted int16_t datetime component
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
*/
std::unique_ptr<cudf::column> extract_datetime_component(
cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
/**
* @addtogroup datetime_compute
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/detail/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
* rmm::cuda_stream_view, rmm::device_async_resource_ref)
*
*/
std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
datetime_component component,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
* rmm::device_async_resource_ref)
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class table {
std::vector<column_view> columns(std::distance(begin, end));
std::transform(
begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
{
std::vector<column_view> columns(std::distance(begin, end));
std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
Loading

0 comments on commit 577a7fc

Please sign in to comment.