Skip to content

Commit

Permalink
Fix Valid if n
Browse files Browse the repository at this point in the history
  • Loading branch information
sdrp713 committed Sep 18, 2024
1 parent a584a34 commit 928ca4f
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 65 deletions.
29 changes: 9 additions & 20 deletions cpp/include/cudf/detail/valid_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -189,45 +189,34 @@ CUDF_KERNEL void valid_if_n_kernel(InputIterator1 begin1,
}

template <typename InputIterator, typename Predicate>
std::pair<std::vector<rmm::device_buffer>, std::vector<size_type>> valid_if_n_kernel(std::vector<InputIterator> strings,
std::pair<std::vector<bitmask_type*>, rmm::device_uvector<size_type>> valid_if_n_kernel(std::vector<InputIterator> strings,
std::vector<int64_t> sizes,
Predicate p,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
rmm::device_uvector<size_type> valid_counts(strings.size(), stream);

rmm::device_uvector<rmm::device_buffer> null_masks(strings.size(), stream);
std::vector<bitmask_type*> null_masks(strings.size(), stream);

thrust::transform(
sizes.begin(),
sizes.end(),
null_masks.begin(),
[stream, mr] __device__ (auto & size) {
return cudf::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr);
return static_cast<bitmask_type*>(cudf::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr));
}
);

auto device_null_masks = make_device_uvector_async(null_masks, stream);

auto counting_iter = thrust::make_counting_iterator(0);
constexpr size_type block_size{256};
grid_1d grid{strings.size(), block_size};
valid_if_n_kernel<block_size><<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>
(strings.begin(), InputIterator2 begin2, p, static_cast<bitmask_type*>(null_masks.data()), strings.size(), 8, valid_counts.data());

std::vector<size_type> host_valid_counts = make_std_vector_async(valid_counts, stream);

std::vector<size_type> host_null_masks = make_std_vector_async(null_masks, stream);

std::vector<size_type> null_counts(strings.size());

std::transform(
thrust::make_zip_iterator(thrust::make_tuple(host_valid_counts.begin(), sizes.begin())),
thrust::make_zip_iterator(thrust::make_tuple(host_valid_counts.end(), sizes.end())),
null_counts.begin(),
[] (auto & elem) {
return thrust::get<1>(elem) - thrust::get<0>(elem);
}
)
(counting_iter, counting_iter, p, device_null_masks.data(), strings.size(), 8, valid_counts.data());

return std::pair(host_null_masks, null_counts);
return std::pair(null_masks, valid_counts);
}

} // namespace detail
Expand Down
31 changes: 20 additions & 11 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,17 @@ namespace strings {
namespace detail {

std::pair<std::vector<std::unique_ptr<column>>, std::vector<int64_t>> make_offsets_child_column_batch(
std::vector<thrust::transform_iterator<size_type> offsets_transformer_itr,
std::vector<size_type> strings_sizes,
std::vector<cudf::device_span<thrust::pair<char const*, size_type> const>> strings_batch,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
std::vector<std::unique_ptr<column>> offsets_columns;
rmm::device_uvector<rmm::device_scalar<int64_t>> total_bytes(offsets_transformer_itr.size(), stream, mr);
auto constexpr size_type_max = static_cast<int64_t>(std::numeric_limits<size_type>::max());

std::transform (
thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.begin(), strings_sizes.begin())),
thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.end(), strings_sizes.end())),
strings_sizes.begin(),
strings_sizes.end(),
std::back_inserter(offsets_columns),
[stream, mr] (auto &elem) {
auto const lcount = static_cast<int64_t>(thrust::get<1>(elem));
Expand All @@ -60,13 +59,18 @@ std::pair<std::vector<std::unique_ptr<column>>, std::vector<int64_t>> make_offse
);

std::transform (
thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.begin(), strings_sizes.begin(), offsets_columns.begin())),
thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.end(), strings_sizes.end(), offsets_columns.end())),
thrust::make_zip_iterator(thrust::make_tuple(strings_batch.begin(), strings_sizes.begin(), offsets_columns.begin())),
thrust::make_zip_iterator(thrust::make_tuple(strings_batch.end(), strings_sizes.end(), offsets_columns.end())),
std::back_inserter(total_bytes),
[] (auto &elem) {
auto offsets_transformer =
cuda::proclaim_return_type<size_type>([] __device__(string_index_pair item) -> size_type {
return (item.first != nullptr ? static_cast<size_type>(item.second) : size_type{0});
});
auto offsets_transformer_itr = thrust::make_transform_iterator(thrust::get<0>(elem), offsets_transformer);
auto d_offsets = thrust::get<2>(elem)->mutable_view().template data<int32_t>();
auto map_fn = cuda::proclaim_return_type<size_type>(
[begin = thrust::get<0>(elem), strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type {
[begin = offsets_transformer_itr, strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type {
return idx < strings_count ? static_cast<size_type>(begin[idx]) : size_type{0};
}
);
Expand All @@ -79,16 +83,21 @@ std::pair<std::vector<std::unique_ptr<column>>, std::vector<int64_t>> make_offse

auto const threshold = cudf::strings::get_offset64_threshold();
std::for_each (
thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.begin(), strings_sizes.begin(), offsets_columns.begin(), offsets_transformer_itr.begin())),
thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.end(), strings_sizes.end(), offsets_columns.end(), offsets_transformer_itr.end())),
thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.begin(), strings_sizes.begin(), offsets_columns.begin(), strings_batch.begin())),
thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.end(), strings_sizes.end(), offsets_columns.end(), strings_batch.end())),
[threshold, stream, mr] (auto &elem) {
auto offsets_transformer =
cuda::proclaim_return_type<size_type>([] __device__(string_index_pair item) -> size_type {
return (item.first != nullptr ? static_cast<size_type>(item.second) : size_type{0});
});
auto offsets_transformer_itr = thrust::make_transform_iterator(thrust::get<0>(elem), offsets_transformer);
CUDF_EXPECTS(cudf::strings::is_large_strings_enabled() || (thrust::get<0>(elem) < threshold),
"Size of output exceeds the column size limit",
std::overflow_error);
if (thrust::get<0>(elem) >= cudf::strings::get_offset64_threshold()) {
// recompute as int64 offsets when above the threshold
auto map_fn = cuda::proclaim_return_type<size_type>(
[begin = thrust::get<3>(elem), strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type {
[begin = offsets_transformer_itr, strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type {
return idx < strings_count ? static_cast<size_type>(begin[idx]) : size_type{0};
}
);
Expand Down
40 changes: 6 additions & 34 deletions cpp/src/strings/strings_column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,51 +42,23 @@ std::vector<std::unique_ptr<column>> make_strings_column_batch(
std::vector<std::unique_ptr<column>>> offset_columns;
std::vector<size_type> total_bytes;
std::vector<size_type> strings_sizes;
std::vector<thrust::transform_iterator<size_type> offsets_transformer_itr;
std::vector<int64_t> chars_sizes;
std::vector<rmm::device_buffer> null_masks;
std::vector<size_type> null_counts;

std::transform(
strings_batch.begin(),
strings_batch.end(),
std::back_inserter(strings_sizes),
[] (auto &strings) {
return thrust::distance(strings.begin(), strings.end());
}
);

std::transform(
strings_batch.begin(),
strings_batch.end(),
std::back_inserter(offsets_transformer_itr),
[stream, mr] (auto &strings) {
size_type strings_count = thrust::distance(strings.begin(), strings.end());
auto offsets_transformer =
cuda::proclaim_return_type<size_type>([] __device__(string_index_pair item) -> size_type {
return (item.first != nullptr ? static_cast<size_type>(item.second) : size_type{0});
});
return thrust::make_transform_iterator(strings.begin(), offsets_transformer);
}
);

[offset_columns, total_bytes] = cudf::strings::detail::make_offsets_child_column_batch(
offsets_transformer_itr, strings_sizes, stream, mr);
[offset_columns, total_bytes] = cudf::strings::detail::make_offsets_child_column_batch(strings_batch, stream, mr);



// create null mask
rmm::device_uvector<size_type> valid_counts(strings.size(), stream);
std::vector<bitmask_type*> null_masks(strings.size(), stream);
auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; };
[] = cudf::detail::valid_if_n_kernel(strings_batch, sizes, validator, stream, mr);
auto const null_count = new_nulls.second;
auto null_mask =
(null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr};

[null_masks, valid_counts] = cudf::detail::valid_if_n_kernel(strings_batch, sizes, validator, stream, mr);

// build chars column
std::transform(
thrust::make_zip_iterator(thrust::make_tuple(offset_columns.begin(), total_bytes.begin(), strings_sizes.begin(), strings_batch.begin(), nu))

thrust::make_zip_iterator(thrust::make_tuple(offset_columns.begin(), total_bytes.begin(), strings_sizes.begin(), strings_batch.begin(), null_masks.begin())),
thrust::make_zip_iterator(thrust::make_tuple(offset_columns.end(), total_bytes.end(), strings_sizes.end(), strings_batch.end(), null_masks.end()))
std::back_inserter(output),
[] (auto &elem) {
auto strings_count = thrust::get<2>(elem)
Expand Down

0 comments on commit 928ca4f

Please sign in to comment.