Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into ref/column_empty/consistent
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Nov 21, 2024
2 parents 0b50848 + 78db66b commit feb82dd
Show file tree
Hide file tree
Showing 22 changed files with 571 additions and 208 deletions.
3 changes: 3 additions & 0 deletions ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ if [[ $(arch) == "aarch64" ]]; then
DESELECTED_TESTS+=("tests/unit/operations/test_join.py::test_join_4_columns_with_validity")
else
# Ensure that we don't run dbgen when it uses newer symbols than supported by the glibc version in the CI image.
# Allow errors since any of these commands could produce empty results that would cause the script to fail.
set +e
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
latest_glibc_symbol_found=$(nm py-polars/tests/benchmark/data/pdsh/dbgen/dbgen | grep GLIBC | grep -o "[0-9]\.[0-9]\+" | sort --version-sort | tail -1 | cut -d "." -f 2)
set -e
if [[ ${glibc_minor_version} -lt ${latest_glibc_symbol_found} ]]; then
DESELECTED_TESTS+=("tests/benchmark/test_pdsh.py::test_pdsh")
fi
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ dependencies:
- ptxcompiler
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvml>=11.4.1,<12.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dependencies:
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvjitlink>=0.0.0a0
- pynvml>=11.4.1,<12.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ requirements:
run:
- python
- cudf ={{ version }}
- pynvml >=11.4.1,<12.0.0a0
- rapids-dask-dependency ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ ConfigureNVBench(

# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp)
ConfigureBench(STRINGS_BENCH string/factory.cu)

ConfigureNVBench(
STRINGS_NVBENCH
Expand All @@ -384,6 +384,7 @@ ConfigureNVBench(
string/lengths.cpp
string/like.cpp
string/make_strings_column.cu
string/repeat_strings.cpp
string/replace.cpp
string/replace_re.cpp
string/reverse.cpp
Expand Down
123 changes: 41 additions & 82 deletions cpp/benchmarks/string/repeat_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,99 +14,58 @@
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/repeat_strings.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

static constexpr cudf::size_type default_repeat_times = 16;
static constexpr cudf::size_type min_repeat_times = -16;
static constexpr cudf::size_type max_repeat_times = 16;
#include <nvbench/nvbench.cuh>

static std::unique_ptr<cudf::table> create_data_table(cudf::size_type n_cols,
cudf::size_type n_rows,
cudf::size_type max_str_length)
static void bench_repeat(nvbench::state& state)
{
CUDF_EXPECTS(n_cols == 1 || n_cols == 2, "Invalid number of columns.");
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
auto const min_repeat = static_cast<cudf::size_type>(state.get_int64("min_repeat"));
auto const max_repeat = static_cast<cudf::size_type>(state.get_int64("max_repeat"));
auto const api = state.get_string("api");

std::vector<cudf::type_id> dtype_ids{cudf::type_id::STRING};
auto builder = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);

if (n_cols == 2) {
dtype_ids.push_back(cudf::type_id::INT32);
builder.distribution(
cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times);
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
builder.distribution(cudf::type_id::INT32, distribution_id::NORMAL, min_repeat, max_repeat);

auto const table = create_random_table(
{cudf::type_id::STRING, cudf::type_id::INT32}, row_count{num_rows}, data_profile{builder});
auto const input = cudf::strings_column_view(table->view().column(0));

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto chars_size = input.chars_size(stream);
state.add_global_memory_reads<nvbench::int8_t>(chars_size);

if (api == "scalar") {
state.add_global_memory_writes<nvbench::int8_t>(chars_size * max_repeat);
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, max_repeat); });
} else if (api == "column") {
auto repeats = table->view().column(1);
{
auto result = cudf::strings::repeat_strings(input, repeats);
auto output = cudf::strings_column_view(result->view());
state.add_global_memory_writes<nvbench::int8_t>(output.chars_size(stream));
}
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::repeat_strings(input, repeats); });
}

return create_random_table(dtype_ids, row_count{n_rows}, data_profile{builder});
}

static void BM_repeat_strings_scalar_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(1, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, default_repeat_times);
}

state.SetBytesProcessed(state.iterations() * strings_col.chars_size(cudf::get_default_stream()));
}

static void BM_repeat_strings_column_times(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
auto const table = create_data_table(2, n_rows, max_str_length);
auto const strings_col = cudf::strings_column_view(table->view().column(0));
auto const repeat_times_col = table->view().column(1);

for ([[maybe_unused]] auto _ : state) {
[[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::repeat_strings(strings_col, repeat_times_col);
}

state.SetBytesProcessed(state.iterations() * (strings_col.chars_size(cudf::get_default_stream()) +
repeat_times_col.size() * sizeof(int32_t)));
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 8;
int const max_rows = 1 << 18;
int const row_mult = 4;
int const min_strlen = 1 << 4;
int const max_strlen = 1 << 8;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_strlen, max_strlen, len_mult);
}

class RepeatStrings : public cudf::benchmark {};

#define REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_scalar_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

#define REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(RepeatStrings, name) \
(::benchmark::State & st) { BM_repeat_strings_column_times(st); } \
BENCHMARK_REGISTER_F(RepeatStrings, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times)
REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times)
NVBENCH_BENCH(bench_repeat)
.set_name("repeat")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("min_repeat", {0})
.add_int64_axis("max_repeat", {16})
.add_int64_axis("num_rows", {32768, 262144, 2097152})
.add_string_axis("api", {"scalar", "column"});
4 changes: 4 additions & 0 deletions cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ namespace CUDF_EXPORT cudf {
* @throw cudf::logic_error if the any of the DLTensor fields are unsupported
*
* @param managed_tensor a 1D or 2D column-major (Fortran order) tensor
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
*
* @return Table with a copy of the tensor data
*/
std::unique_ptr<table> from_dlpack(
DLManagedTensor const* managed_tensor,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -79,12 +81,14 @@ std::unique_ptr<table> from_dlpack(
* or if any of columns have non-zero null count
*
* @param input Table to convert to DLPack
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned DLPack tensor's device memory
*
* @return 1D or 2D DLPack tensor with a copy of the table data, or nullptr
*/
DLManagedTensor* to_dlpack(
table_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,7 @@ table_with_metadata read_csv(
*/

/**
*@brief Builder to build options for `writer_csv()`.
*@brief Builder to build options for `write_csv()`.
*/
class csv_writer_options_builder;

Expand Down
9 changes: 6 additions & 3 deletions cpp/src/interop/dlpack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,16 +297,19 @@ DLManagedTensor* to_dlpack(table_view const& input,
} // namespace detail

std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::from_dlpack(managed_tensor, cudf::get_default_stream(), mr);
return detail::from_dlpack(managed_tensor, stream, mr);
}

DLManagedTensor* to_dlpack(table_view const& input, rmm::device_async_resource_ref mr)
DLManagedTensor* to_dlpack(table_view const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::to_dlpack(input, cudf::get_default_stream(), mr);
return detail::to_dlpack(input, stream, mr);
}

} // namespace cudf
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_INTEROP streams/interop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_JOIN_TEST streams/join_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing)
Expand Down
46 changes: 46 additions & 0 deletions cpp/tests/streams/interop_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

#include <cudf/interop.hpp>
#include <cudf/table/table_view.hpp>

#include <dlpack/dlpack.h>

struct dlpack_deleter {
void operator()(DLManagedTensor* tensor) { tensor->deleter(tensor); }
};

struct DLPackTest : public cudf::test::BaseFixture {};

TEST_F(DLPackTest, ToDLPack)
{
cudf::table_view empty(std::vector<cudf::column_view>{});
cudf::to_dlpack(empty, cudf::test::get_default_stream());
}

TEST_F(DLPackTest, FromDLPack)
{
using unique_managed_tensor = std::unique_ptr<DLManagedTensor, dlpack_deleter>;
cudf::test::fixed_width_column_wrapper<int32_t> col1({});
cudf::test::fixed_width_column_wrapper<int32_t> col2({});
cudf::table_view input({col1, col2});
unique_managed_tensor tensor(cudf::to_dlpack(input, cudf::test::get_default_stream()));
auto result = cudf::from_dlpack(tensor.get(), cudf::test::get_default_stream());
}
1 change: 1 addition & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- pynvml>=11.4.1,<12.0.0a0
- rapids-dask-dependency==25.2.*,>=0.0.0a0
run_custreamz:
common:
Expand Down
Loading

0 comments on commit feb82dd

Please sign in to comment.