diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index cd7982ad66d..03cdb58beca 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -189,7 +189,7 @@ CUDF_KERNEL void valid_if_n_kernel(InputIterator1 begin1, } template -std::pair, std::vector> valid_if_n_kernel(std::vector strings, +std::pair, rmm::device_uvector> valid_if_n_kernel(std::vector strings, std::vector sizes, Predicate p, rmm::cuda_stream_view stream, @@ -197,37 +197,26 @@ std::pair, std::vector> valid_if_n_ke { rmm::device_uvector valid_counts(strings.size(), stream); - rmm::device_uvector null_masks(strings.size(), stream); + std::vector null_masks(strings.size(), stream); thrust::transform( sizes.begin(), sizes.end(), null_masks.begin(), [stream, mr] __device__ (auto & size) { - return cudf::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr); + return static_cast(cudf::create_null_mask(size, mask_state::UNINITIALIZED, stream, mr)); } ); + + auto device_null_masks = make_device_uvector_async(null_masks, stream); + + auto counting_iter = thrust::make_counting_iterator(0); constexpr size_type block_size{256}; grid_1d grid{strings.size(), block_size}; valid_if_n_kernel<<>> - (strings.begin(), InputIterator2 begin2, p, static_cast(null_masks.data()), strings.size(), 8, valid_counts.data()); - - std::vector host_valid_counts = make_std_vector_async(valid_counts, stream); - - std::vector host_null_masks = make_std_vector_async(null_masks, stream); - - std::vector null_counts(strings.size()); - - std::transform( - thrust::make_zip_iterator(thrust::make_tuple(host_valid_counts.begin(), sizes.begin())), - thrust::make_zip_iterator(thrust::make_tuple(host_valid_counts.end(), sizes.end())), - null_counts.begin(), - [] (auto & elem) { - return thrust::get<1>(elem) - thrust::get<0>(elem); - } - ) + (counting_iter, counting_iter, p, device_null_masks.data(), strings.size(), 8, valid_counts.data()); - return std::pair(host_null_masks, null_counts); + return std::pair(null_masks, valid_counts); } } // namespace detail diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 30f5dce6c28..3cc02c97d45 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -39,18 +39,17 @@ namespace strings { namespace detail { std::pair>, std::vector> make_offsets_child_column_batch( - std::vector offsets_transformer_itr, - std::vector strings_sizes, + std::vector const>> strings_batch, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { std::vector> offsets_columns; rmm::device_uvector> total_bytes(offsets_transformer_itr.size(), stream, mr); auto constexpr size_type_max = static_cast(std::numeric_limits::max()); - + std::transform ( - thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.begin(), strings_sizes.begin())), - thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.end(), strings_sizes.end())), + strings_sizes.begin(), + strings_sizes.end(), std::back_inserter(offsets_columns), [stream, mr] (auto &elem) { auto const lcount = static_cast(thrust::get<1>(elem)); @@ -60,13 +59,18 @@ std::pair>, std::vector> make_offse ); std::transform ( - thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.begin(), strings_sizes.begin(), offsets_columns.begin())), - thrust::make_zip_iterator(thrust::make_tuple(offsets_transformer_itr.end(), strings_sizes.end(), offsets_columns.end())), + thrust::make_zip_iterator(thrust::make_tuple(strings_batch.begin(), strings_sizes.begin(), offsets_columns.begin())), + thrust::make_zip_iterator(thrust::make_tuple(strings_batch.end(), strings_sizes.end(), offsets_columns.end())), std::back_inserter(total_bytes), [] (auto &elem) { + auto offsets_transformer = + cuda::proclaim_return_type([] __device__(string_index_pair item) -> size_type { + return (item.first != nullptr ? static_cast(item.second) : size_type{0}); + }); + auto offsets_transformer_itr = thrust::make_transform_iterator(thrust::get<0>(elem), offsets_transformer); auto d_offsets = thrust::get<2>(elem)->mutable_view().template data(); auto map_fn = cuda::proclaim_return_type( - [begin = thrust::get<0>(elem), strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type { + [begin = offsets_transformer_itr, strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type { return idx < strings_count ? static_cast(begin[idx]) : size_type{0}; } ); @@ -79,16 +83,21 @@ std::pair>, std::vector> make_offse auto const threshold = cudf::strings::get_offset64_threshold(); std::for_each ( - thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.begin(), strings_sizes.begin(), offsets_columns.begin(), offsets_transformer_itr.begin())), - thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.end(), strings_sizes.end(), offsets_columns.end(), offsets_transformer_itr.end())), + thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.begin(), strings_sizes.begin(), offsets_columns.begin(), strings_batch.begin())), + thrust::make_zip_iterator(thrust::make_tuple(host_total_bytes.end(), strings_sizes.end(), offsets_columns.end(), strings_batch.end())), [threshold, stream, mr] (auto &elem) { + auto offsets_transformer = + cuda::proclaim_return_type([] __device__(string_index_pair item) -> size_type { + return (item.first != nullptr ? static_cast(item.second) : size_type{0}); + }); + auto offsets_transformer_itr = thrust::make_transform_iterator(thrust::get<0>(elem), offsets_transformer); CUDF_EXPECTS(cudf::strings::is_large_strings_enabled() || (thrust::get<0>(elem) < threshold), "Size of output exceeds the column size limit", std::overflow_error); if (thrust::get<0>(elem) >= cudf::strings::get_offset64_threshold()) { // recompute as int64 offsets when above the threshold auto map_fn = cuda::proclaim_return_type( - [begin = thrust::get<3>(elem), strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type { + [begin = offsets_transformer_itr, strings_count = thrust::get<1>(elem)] __device__(size_type idx) -> size_type { return idx < strings_count ? static_cast(begin[idx]) : size_type{0}; } ); diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index 1f87dbecf00..dc81d673c27 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -42,51 +42,23 @@ std::vector> make_strings_column_batch( std::vector>> offset_columns; std::vector total_bytes; std::vector strings_sizes; - std::vector offsets_transformer_itr; std::vector chars_sizes; std::vector null_masks; std::vector null_counts; - std::transform( - strings_batch.begin(), - strings_batch.end(), - std::back_inserter(strings_sizes), - [] (auto &strings) { - return thrust::distance(strings.begin(), strings.end()); - } - ); - - std::transform( - strings_batch.begin(), - strings_batch.end(), - std::back_inserter(offsets_transformer_itr), - [stream, mr] (auto &strings) { - size_type strings_count = thrust::distance(strings.begin(), strings.end()); - auto offsets_transformer = - cuda::proclaim_return_type([] __device__(string_index_pair item) -> size_type { - return (item.first != nullptr ? static_cast(item.second) : size_type{0}); - }); - return thrust::make_transform_iterator(strings.begin(), offsets_transformer); - } - ); - - [offset_columns, total_bytes] = cudf::strings::detail::make_offsets_child_column_batch( - offsets_transformer_itr, strings_sizes, stream, mr); + [offset_columns, total_bytes] = cudf::strings::detail::make_offsets_child_column_batch(strings_batch, stream, mr); - // create null mask + rmm::device_uvector valid_counts(strings.size(), stream); + std::vector null_masks(strings.size(), stream); auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; - [] = cudf::detail::valid_if_n_kernel(strings_batch, sizes, validator, stream, mr); - auto const null_count = new_nulls.second; - auto null_mask = - (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; - + [null_masks, valid_counts] = cudf::detail::valid_if_n_kernel(strings_batch, sizes, validator, stream, mr); // build chars column std::transform( - thrust::make_zip_iterator(thrust::make_tuple(offset_columns.begin(), total_bytes.begin(), strings_sizes.begin(), strings_batch.begin(), nu)) - + thrust::make_zip_iterator(thrust::make_tuple(offset_columns.begin(), total_bytes.begin(), strings_sizes.begin(), strings_batch.begin(), null_masks.begin())), + thrust::make_zip_iterator(thrust::make_tuple(offset_columns.end(), total_bytes.end(), strings_sizes.end(), strings_batch.end(), null_masks.end())) std::back_inserter(output), [] (auto &elem) { auto strings_count = thrust::get<2>(elem)