Skip to content

Commit

Permalink
Remove invalid column_view usage in string-scalar-to-column function (#…
Browse files Browse the repository at this point in the history
…16530)

Fixes the `make_column_from_scalar` function for `string_scalar` internal usage of a temporary `column_view` with non-zero size but no data or children to call `cudf::strings::detail::fill`. This relied too much on fragile internal logic which has cause several headaches including the recent work adding prefetch logic to libcudf.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: #16530
  • Loading branch information
davidwendt authored Aug 19, 2024
1 parent dd2c12d commit 592342c
Showing 1 changed file with 20 additions and 8 deletions.
28 changes: 20 additions & 8 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/strings/detail/strings_column_factories.cuh>

#include <rmm/resource_ref.hpp>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/uninitialized_fill.h>

namespace cudf {

Expand Down Expand Up @@ -57,15 +58,26 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
{
if (size == 0) return make_empty_column(value.type());

// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{value.type(), size, nullptr, nullptr, 0};
auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
if (!value.is_valid(stream)) {
return make_strings_column(
size,
make_column_from_scalar(numeric_scalar<int32_t>(0), size + 1, stream, mr),
rmm::device_buffer{},
size,
cudf::detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr));
}

// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
auto& ss = static_cast<scalar_type_t<cudf::string_view> const&>(value);
auto const d_str = ss.value(stream); // no actual data is copied

return output;
// fill the column with the scalar
rmm::device_uvector<cudf::strings::detail::string_index_pair> indices(size, stream);
auto const row_value =
d_str.empty() ? cudf::strings::detail::string_index_pair{"", 0}
: cudf::strings::detail::string_index_pair{d_str.data(), d_str.size_bytes()};
thrust::uninitialized_fill(
rmm::exec_policy_nosync(stream), indices.begin(), indices.end(), row_value);
return cudf::strings::detail::make_strings_column(indices.begin(), indices.end(), stream, mr);
}

template <>
Expand Down

0 comments on commit 592342c

Please sign in to comment.