Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/s…
Browse files Browse the repository at this point in the history
…tring_casting
  • Loading branch information
mroeschke committed Dec 3, 2024
2 parents 857b7fc + 541e7e8 commit c5eb463
Show file tree
Hide file tree
Showing 191 changed files with 2,449 additions and 2,521 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
files_yaml: |
test_cpp:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -71,6 +72,7 @@ jobs:
- '!python/**'
test_cudf_pandas:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
Expand All @@ -79,6 +81,7 @@ jobs:
- '!notebooks/**'
test_java:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand All @@ -88,12 +91,14 @@ jobs:
- '!python/**'
test_notebooks:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!java/**'
test_python:
- '**'
- '!.devcontainer/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
Expand Down
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: |
Expand All @@ -17,11 +17,11 @@ repos:
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
rev: v0.16.6
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.10.0'
rev: 'v1.13.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
Expand All @@ -33,7 +33,7 @@ repos:
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.8.5
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
Expand All @@ -52,7 +52,7 @@ repos:
^cpp/include/cudf_test/cxxopts.hpp
)
- repo: https://github.com/sirosen/texthooks
rev: 0.6.6
rev: 0.6.7
hooks:
- id: fix-smartquotes
exclude: |
Expand Down Expand Up @@ -133,7 +133,7 @@ repos:
pass_filenames: false
verbose: true
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
additional_dependencies: [tomli]
Expand All @@ -144,7 +144,7 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
rev: v0.8.0
hooks:
- id: ruff
args: ["--fix"]
Expand Down
2 changes: 1 addition & 1 deletion ci/cpp_linters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ source rapids-configure-sccache
# Run the build via CMake, which will run clang-tidy when CUDF_STATIC_LINTERS is enabled.

iwyu_flag=""
if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
if [[ "${RAPIDS_BUILD_TYPE:-}" == "nightly" ]]; then
iwyu_flag="-DCUDF_IWYU=ON"
fi
cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON ${iwyu_flag} -DBUILD_TESTS=OFF -GNinja
Expand Down
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand All @@ -97,8 +96,6 @@ dependencies:
- sphinxcontrib-websupport
- streamz
- sysroot_linux-64==2.17
- tokenizers==0.15.2
- transformers==4.39.3
- typing_extensions>=4.0.0
- zlib>=1.2.13
name: all_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ dependencies:
- python-confluent-kafka>=2.5.0,<2.6.0a0
- python-xxhash
- python>=3.10,<3.13
- pytorch>=2.1.0
- pytorch>=2.4.0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
- rich
Expand Down
16 changes: 12 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ option(
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
option(CUDF_CLANG_TIDY "Enable clang-tidy during compilation" OFF)
option(CUDF_IWYU "Enable IWYU during compilation" OFF)
option(CUDF_CLANG_TIDY_AUTOFIX "Enable clang-tidy autofixes" OFF)

option(
CUDF_KVIKIO_REMOTE_IO
Expand Down Expand Up @@ -205,9 +206,16 @@ function(enable_static_checkers target)
if(_LINT_CLANG_TIDY)
# clang will complain about unused link libraries on the compile line unless we specify
# -Qunused-arguments.
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
if(CUDF_CLANG_TIDY_AUTOFIX)
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY
"${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments;--fix"
)
else()
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
endif()
endif()
if(_LINT_IWYU)
# A few extra warnings pop up when building with IWYU. I'm not sure why, but they are not
Expand Down Expand Up @@ -1014,7 +1022,7 @@ if(CUDF_BUILD_TESTUTIL)
)

target_link_libraries(
cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
cudftestutil INTERFACE cuco::cuco Threads::Threads cudf cudftest_default_stream
$<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,6 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu)

ConfigureNVBench(
STRINGS_NVBENCH
string/case.cpp
Expand All @@ -377,6 +375,7 @@ ConfigureNVBench(
string/copy_range.cpp
string/count.cpp
string/extract.cpp
string/factory.cpp
string/filter.cpp
string/find.cpp
string/find_multiple.cpp
Expand Down
60 changes: 60 additions & 0 deletions cpp/benchmarks/string/factory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_uvector.hpp>

#include <nvbench/nvbench.cuh>

#include <limits>

static void bench_factory(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
auto const sv = cudf::strings_column_view(column->view());

auto stream = cudf::get_default_stream();
auto mr = cudf::get_current_device_resource_ref();
auto d_strings = cudf::strings::detail::create_string_vector_from_column(sv, stream, mr);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = sv.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
state.add_global_memory_writes<nvbench::int8_t>(chars_size);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::make_strings_column(d_strings, cudf::string_view{nullptr, 0});
});
}

NVBENCH_BENCH(bench_factory)
.set_name("factory")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144, 2097152});
92 changes: 0 additions & 92 deletions cpp/benchmarks/string/factory.cu

This file was deleted.

6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
function(find_and_configure_cucollections)
include(${rapids-cmake-dir}/cpm/cuco.cmake)

if(BUILD_SHARED_LIBS)
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports)
else()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endif()
rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
endfunction()

find_and_configure_cucollections()
Loading

0 comments on commit c5eb463

Please sign in to comment.