Skip to content

Commit

Permalink
Fix strings::detail::copy_range when target contains nulls (#16626)
Browse files Browse the repository at this point in the history
Fixes the logic in `cudf::strings::detail::copy_range` handling of nulls in the target range. The optimization check for nulls is removed simplifying the logic and making it more reliable as well. The benchmark showed no significant change in performance.
Also adds a specific gtest for this case.
Error was introduced in #15010
Closes #16618

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: #16626
  • Loading branch information
davidwendt authored Aug 27, 2024
1 parent 88de8dd commit e2a15cb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 20 deletions.
23 changes: 3 additions & 20 deletions cpp/src/strings/copying/copy_range.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,14 @@ struct compute_element_size {
size_type source_begin;
size_type target_begin;
size_type target_end;
bool source_has_nulls;
bool target_has_nulls;

__device__ cudf::size_type operator()(cudf::size_type idx)
{
if (idx >= target_begin && idx < target_end) {
auto const str_idx = source_begin + (idx - target_begin);
return source_has_nulls && d_source.is_null_nocheck(str_idx)
? 0
: d_source.element<string_view>(str_idx).size_bytes();
return d_source.is_null(str_idx) ? 0 : d_source.element<string_view>(str_idx).size_bytes();
} else {
return target_has_nulls && d_target.is_null_nocheck(idx)
? 0
: d_target.element<string_view>(idx).size_bytes();
return d_target.is_null(idx) ? 0 : d_target.element<string_view>(idx).size_bytes();
}
}
};
Expand Down Expand Up @@ -97,20 +91,9 @@ std::unique_ptr<column> copy_range(strings_column_view const& source,
mr);
}();

auto [check_source, check_target] = [target, null_count = null_count] {
// check validities for both source & target
if (target.has_nulls()) { return std::make_pair(true, true); }
// check validities for source only
if (null_count > 0) { return std::make_pair(true, false); }
// no need to check validities
return std::make_pair(false, false);
}();

// create offsets
auto sizes_begin = cudf::detail::make_counting_transform_iterator(
0,
compute_element_size{
d_source, d_target, source_begin, target_begin, target_end, check_source, check_target});
0, compute_element_size{d_source, d_target, source_begin, target_begin, target_end});
auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column(
sizes_begin, sizes_begin + target.size(), stream, mr);
auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());
Expand Down
10 changes: 10 additions & 0 deletions cpp/tests/copying/copy_range_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,16 @@ TEST_F(CopyRangeTestFixture, CopyWithNullsString)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected);
}

TEST_F(CopyRangeTestFixture, CopyWithTargetNullsString)
{
auto target =
cudf::test::strings_column_wrapper({"a", "b", "", "d", "", "é"}, {1, 1, 0, 1, 1, 1});
auto source = cudf::test::strings_column_wrapper({"A", "B", "C", "D", "E", "F"});
auto result = cudf::copy_range(source, target, 1, 5, 1);
auto expected = cudf::test::strings_column_wrapper({"a", "B", "C", "D", "E", "é"});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
}

TEST_F(CopyRangeTestFixture, CopyNoNullsString)
{
cudf::size_type size{100};
Expand Down

0 comments on commit e2a15cb

Please sign in to comment.