Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into 13297
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Aug 27, 2024
2 parents 501e92f + 6747d2d commit ab493db
Show file tree
Hide file tree
Showing 29 changed files with 242 additions and 100 deletions.
88 changes: 78 additions & 10 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ concurrency:
jobs:
pr-builder:
needs:
- changed-files
- checks
- conda-cpp-build
- conda-cpp-checks
Expand All @@ -37,6 +38,63 @@ jobs:
- pandas-tests-diff
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: always()
with:
needs: ${{ toJSON(needs) }}
changed-files:
runs-on: ubuntu-latest
name: "Check changed files"
outputs:
test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }}
test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
steps:
- name: Get PR info
id: get-pr-info
uses: rapidsai/shared-actions/get-pr-info@main
- name: Checkout code repo
uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
fetch-depth: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).commits }}
persist-credentials: false
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
files_yaml: |
cpp:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
- '!python/**'
java:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!notebooks/**'
- '!python/**'
notebooks:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!java/**'
python:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand All @@ -56,9 +114,10 @@ jobs:
build_type: pull-request
enable_check_symbols: true
conda-cpp-tests:
needs: conda-cpp-build
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_cpp == 'true'
with:
build_type: pull-request
conda-python-build:
Expand All @@ -68,24 +127,27 @@ jobs:
with:
build_type: pull-request
conda-python-cudf-tests:
needs: conda-python-build
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
build_type: pull-request
script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
build_type: pull-request
script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_java == 'true'
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand All @@ -103,9 +165,10 @@ jobs:
container_image: "rapidsai/ci-wheel:latest"
run_script: "ci/configure_cpp_static.sh"
conda-notebook-tests:
needs: conda-python-build
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_notebooks == 'true'
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand Down Expand Up @@ -145,9 +208,10 @@ jobs:
build_type: pull-request
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: wheel-build-cudf
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
Expand All @@ -161,9 +225,10 @@ jobs:
build_type: pull-request
script: "ci/build_wheel_cudf_polars.sh"
wheel-tests-cudf-polars:
needs: wheel-build-cudf-polars
needs: [wheel-build-cudf-polars, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -181,9 +246,10 @@ jobs:
build_type: pull-request
script: "ci/build_wheel_dask_cudf.sh"
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
needs: [wheel-build-dask-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -200,19 +266,21 @@ jobs:
build-all -DBUILD_BENCHMARKS=ON --verbose;
sccache -s;
unit-tests-cudf-pandas:
needs: wheel-build-cudf
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
pandas-tests:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ repos:
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.3.1
rev: v0.4.0
hooks:
- id: verify-copyright
exclude: |
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ conda install -c rapidsai -c conda-forge -c nvidia \
We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
of our latest development branch.

Note: cuDF is supported only on Linux, and with Python versions 3.9 and later.
Note: cuDF is supported only on Linux, and with Python versions 3.10 and later.

See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=1.9.0,<1.10.0a0
- python>=3.9,<3.12
- python>=3.10,<3.12
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==24.10.*,>=0.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=1.9.0,<1.10.0a0
- python>=3.9,<3.12
- python>=3.10,<3.12
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==24.10.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ add_library(
src/unary/math_ops.cu
src/unary/nan_ops.cu
src/unary/null_ops.cu
src/utilities/cuda.cpp
src/utilities/cuda_memcpy.cu
src/utilities/default_stream.cpp
src/utilities/host_memory.cpp
Expand Down
10 changes: 3 additions & 7 deletions cpp/benchmarks/join/generate_input_tables.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/cuda.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -150,13 +151,8 @@ void generate_input_tables(key_type* const build_tbl,
CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks_init_probe_tbl, init_probe_tbl<key_type, size_type>, block_size, 0));

int dev_id{-1};
CUDF_CUDA_TRY(cudaGetDevice(&dev_id));

int num_sms{-1};
CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));

int const num_states =
auto const num_sms = cudf::detail::num_multiprocessors();
auto const num_states =
num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
rmm::device_uvector<curandState> devStates(num_states, cudf::get_default_stream());

Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/reduction/minmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ static void reduction_minmax(nvbench::state& state, nvbench::type_list<DataType>
set_throughputs(state);
}

NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");

using Types = nvbench::type_list<bool, int8_t, int32_t, float, cudf::timestamp_ms>;

NVBENCH_BENCH_TYPES(reduction_minmax, NVBENCH_TYPE_AXES(Types))
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/reduction/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ static void reduction(nvbench::state& state, nvbench::type_list<DataType, nvbenc
set_throughputs(state);
}

NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");

using Types = nvbench::type_list<int32_t, int64_t, double, cudf::timestamp_ms>;
using AggKinds = nvbench::enum_type_list<cudf::reduce_aggregation::MIN,
cudf::reduce_aggregation::SUM,
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/sort/rank_lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ void nvbench_rank_lists(nvbench::state& state, nvbench::type_list<nvbench::enum_
cudf::order::ASCENDING,
null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
cudf::null_order::AFTER,
false,
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
});
}
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/sort/rank_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ void nvbench_rank_structs(nvbench::state& state, nvbench::type_list<nvbench::enu
cudf::order::ASCENDING,
nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
cudf::null_order::AFTER,
false,
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
});
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
APPEND
initial_code_block
[=[
find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/gather.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/cuda.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/table/table.hpp>
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
*/
__device__ inline cudf::size_type operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
void const* tp = p_ + (static_cast<std::ptrdiff_t>(idx) * this->width_);
return type_dispatcher(this->dtype_, normalize_type{}, tp);
}

Expand All @@ -109,7 +109,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
: base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
{
p_ += offset * this->width_;
p_ += static_cast<std::ptrdiff_t>(offset) * this->width_;
}

protected:
Expand Down Expand Up @@ -165,7 +165,7 @@ struct output_indexalator : base_normalator<output_indexalator, cudf::size_type>
__device__ inline output_indexalator const operator[](size_type idx) const
{
output_indexalator tmp{*this};
tmp.p_ += (idx * this->width_);
tmp.p_ += static_cast<std::ptrdiff_t>(idx) * this->width_;
return tmp;
}

Expand Down
29 changes: 0 additions & 29 deletions cpp/include/cudf/detail/utilities/cuda.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -189,35 +189,6 @@ __device__ T single_lane_block_sum_reduce(T lane_value)
return result;
}

/**
* @brief Get the number of elements that can be processed per thread.
*
* @param[in] kernel The kernel for which the elements per thread needs to be assessed
* @param[in] total_size Number of elements
* @param[in] block_size Expected block size
*
* @return cudf::size_type Elements per thread that can be processed for given specification.
*/
template <typename Kernel>
cudf::size_type elements_per_thread(Kernel kernel,
cudf::size_type total_size,
cudf::size_type block_size,
cudf::size_type max_per_thread = 32)
{
CUDF_FUNC_RANGE();

// calculate theoretical occupancy
int max_blocks = 0;
CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0));

int device = 0;
CUDF_CUDA_TRY(cudaGetDevice(&device));
int num_sms = 0;
CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device));
int per_thread = total_size / (max_blocks * num_sms * block_size);
return std::clamp(per_thread, 1, max_per_thread);
}

/**
* @brief Finds the smallest value not less than `number_to_round` and modulo `modulus` is
* zero. Expects modulus to be a power of 2.
Expand Down
Loading

0 comments on commit ab493db

Please sign in to comment.