Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into enh-json_pylibcudf_all_features
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann authored Dec 13, 2024
2 parents 37c3037 + 1a67646 commit ac732b8
Show file tree
Hide file tree
Showing 134 changed files with 2,687 additions and 3,861 deletions.
16 changes: 11 additions & 5 deletions ci/cudf_pandas_scripts/third-party-integration/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ main() {
LIBS=${LIBS#[}
LIBS=${LIBS%]}

ANY_FAILURES=0

for lib in ${LIBS//,/ }; do
lib=$(echo "$lib" | tr -d '""')
echo "Running tests for library $lib"
Expand Down Expand Up @@ -56,10 +58,6 @@ main() {
rapids-logger "Check GPU usage"
nvidia-smi

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest ${lib}"

NUM_PROCESSES=8
Expand All @@ -72,12 +70,20 @@ main() {
fi
done

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib}

set -e
rapids-logger "Test script exiting with value: ${EXITCODE}"
if [[ ${EXITCODE} != 0 ]]; then
ANY_FAILURES=1
fi
done

exit ${EXITCODE}
exit ${ANY_FAILURES}
}

main "$@"
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1105,7 +1105,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
${_tgt} PRIVATE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
)
target_include_directories(${_tgt} PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>")
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm rmm::rmm_logger rmm::rmm_logger_impl)
if(CUDF_BUILD_STACKTRACE_DEBUG)
target_link_libraries(${_tgt} PRIVATE cudf_backtrace)
endif()
Expand Down
20 changes: 14 additions & 6 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
endfunction()

# ##################################################################################################
# * column benchmarks -----------------------------------------------------------------------------
ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate.cpp)
# * copying benchmarks
# -----------------------------------------------------------------------------
ConfigureNVBench(COPYING_NVBENCH copying/concatenate.cpp)

# ##################################################################################################
# * gather benchmark ------------------------------------------------------------------------------
Expand Down Expand Up @@ -351,11 +352,18 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
ConfigureBench(TEXT_BENCH text/subword.cpp)

ConfigureNVBench(
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
text/ngrams.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp
TEXT_NVBENCH
text/edit_distance.cpp
text/hash_ngrams.cpp
text/jaccard.cpp
text/minhash.cpp
text/ngrams.cpp
text/normalize.cpp
text/replace.cpp
text/subword.cpp
text/tokenize.cpp
text/vocab.cpp
)

# ##################################################################################################
Expand Down
169 changes: 0 additions & 169 deletions cpp/benchmarks/column/concatenate.cpp

This file was deleted.

84 changes: 84 additions & 0 deletions cpp/benchmarks/copying/concatenate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <benchmarks/common/generate_input.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/concatenate.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

#include <vector>

static void bench_concatenate(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const nulls = static_cast<cudf::size_type>(state.get_float64("nulls"));

auto input = create_sequence_table(
cycle_dtypes({cudf::type_to_id<int64_t>()}, num_cols), row_count{num_rows}, nulls);
auto input_columns = input->view();
auto column_views = std::vector<cudf::column_view>(input_columns.begin(), input_columns.end());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.add_global_memory_reads<int64_t>(num_rows * num_cols);
state.add_global_memory_writes<int64_t>(num_rows * num_cols);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
}

NVBENCH_BENCH(bench_concatenate)
.set_name("concatenate")
.add_int64_axis("num_rows", {64, 512, 4096, 32768, 262144})
.add_int64_axis("num_cols", {2, 8, 64, 512, 1024})
.add_float64_axis("nulls", {0.0, 0.3});

static void bench_concatenate_strings(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const nulls = static_cast<cudf::size_type>(state.get_float64("nulls"));

data_profile const profile =
data_profile_builder()
.distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
.null_probability(nulls);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
auto const input = column->view();

auto column_views = std::vector<cudf::column_view>(num_cols, input);

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto const sv = cudf::strings_column_view(input);
state.add_global_memory_reads<int8_t>(sv.chars_size(stream) * num_cols);
state.add_global_memory_writes<int64_t>(sv.chars_size(stream) * num_cols);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
}

NVBENCH_BENCH(bench_concatenate_strings)
.set_name("concatenate_strings")
.add_int64_axis("num_rows", {256, 512, 4096, 16384})
.add_int64_axis("num_cols", {2, 8, 64, 256})
.add_int64_axis("row_width", {32, 128})
.add_float64_axis("nulls", {0.0, 0.3});
19 changes: 8 additions & 11 deletions cpp/benchmarks/string/case.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,14 @@

void bench_case(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const max_width = static_cast<int32_t>(state.get_int64("row_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
auto const encoding = state.get_string("encoding");

if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(max_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);

auto col_view = column->view();

Expand Down Expand Up @@ -74,6 +70,7 @@ void bench_case(nvbench::state& state)

NVBENCH_BENCH(bench_case)
.set_name("case")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
.add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144, 2097152})
.add_string_axis("encoding", {"ascii", "utf8"});
Loading

0 comments on commit ac732b8

Please sign in to comment.