Skip to content

Commit

Permalink
Add stream parameter to List Sort and Filter APIs (rapidsai#14272)
Browse files Browse the repository at this point in the history
This PR introduces the stream parameter to the List Sorting and Filtering APIs.

Sorting and Filtering (`extract.hpp`, `filling.hpp`, `gather.hpp`, `reverse.hpp`, `sorting.hpp`, `stream_compaction.hpp`)

```
extract_list_element - index
extract_list_element - indices
segmented_gather
sequences - without steps
sequences - with steps
reverse
sort_lists
stable_sort_lists
apply_boolean_mask
distinct
```

Reference [13744](rapidsai#13744)

Authors:
  - Suraj Aralihalli (https://github.com/SurajAralihalli)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: rapidsai#14272
  • Loading branch information
SurajAralihalli authored Oct 16, 2023
1 parent ef92310 commit c47546e
Show file tree
Hide file tree
Showing 15 changed files with 129 additions and 17 deletions.
6 changes: 5 additions & 1 deletion cpp/include/cudf/lists/extract.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -59,12 +59,14 @@ namespace lists {
*
* @param lists_column Column to extract elements from.
* @param index The row within each sublist to retrieve.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Column of extracted elements.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view const& lists_column,
size_type index,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -97,13 +99,15 @@ std::unique_ptr<column> extract_list_element(
* @param lists_column Column to extract elements from.
* @param indices The column whose rows indicate the element index to be retrieved from each list
* row.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Column of extracted elements.
* @throws cudf::logic_error If the sizes of `lists_column` and `indices` do not match.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view const& lists_column,
column_view const& indices,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
6 changes: 6 additions & 0 deletions cpp/include/cudf/lists/filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

#include <memory>
Expand Down Expand Up @@ -57,12 +59,14 @@ namespace cudf::lists {
*
* @param starts First values in the result sequences.
* @param sizes Numbers of values in the result sequences.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return The result column containing generated sequences.
*/
std::unique_ptr<column> sequences(
column_view const& starts,
column_view const& sizes,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -96,13 +100,15 @@ std::unique_ptr<column> sequences(
* @param starts First values in the result sequences.
* @param steps Increment values for the result sequences.
* @param sizes Numbers of values in the result sequences.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return The result column containing generated sequences.
*/
std::unique_ptr<column> sequences(
column_view const& starts,
column_view const& steps,
column_view const& sizes,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/lists/gather.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ namespace lists {
* @param bounds_policy Can be `DONT_CHECK` or `NULLIFY`. Selects whether or not to nullify the
* output list row's element, when the gather index falls outside the range `[-n, n)`,
* where `n` is the number of elements in list row corresponding to the gather-map row.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource to allocate any returned objects
* @return column with elements in list of rows gathered based on `gather_map_list`
*
Expand All @@ -73,6 +74,7 @@ std::unique_ptr<column> segmented_gather(
lists_column_view const& source_column,
lists_column_view const& gather_map_list,
out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/cudf/lists/reverse.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -42,11 +42,13 @@ namespace cudf::lists {
* @endcode
*
* @param input Lists column for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New lists column with reversed lists
*/
std::unique_ptr<column> reverse(
lists_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
5 changes: 4 additions & 1 deletion cpp/include/cudf/lists/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -46,6 +46,7 @@ namespace lists {
* @param source_column View of the list column of numeric types to sort
* @param column_order The desired sort order
* @param null_precedence The desired order of null compared to other elements in the list
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to allocate any returned objects
* @return list column with elements in each list sorted.
*
Expand All @@ -54,6 +55,7 @@ std::unique_ptr<column> sort_lists(
lists_column_view const& source_column,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -66,6 +68,7 @@ std::unique_ptr<column> stable_sort_lists(
lists_column_view const& source_column,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
6 changes: 5 additions & 1 deletion cpp/include/cudf/lists/stream_compaction.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,12 +54,14 @@ namespace cudf::lists {
*
* @param input The input list column view to be filtered
* @param boolean_mask A nullable list of bools column used to filter `input` elements
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table's device memory
* @return List column of the same type as `input`, containing filtered list rows
*/
std::unique_ptr<column> apply_boolean_mask(
lists_column_view const& input,
lists_column_view const& boolean_mask,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -78,13 +80,15 @@ std::unique_ptr<column> apply_boolean_mask(
* @param input The input lists column
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether floating-point NaNs should be considered as equal
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned object
* @return The resulting lists column containing lists without duplicates
*/
std::unique_ptr<column> distinct(
lists_column_view const& input,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::ALL_EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/lists/copying/segmented_gather.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ std::unique_ptr<column> segmented_gather(lists_column_view const& value_column,
std::unique_ptr<column> segmented_gather(lists_column_view const& source_column,
lists_column_view const& gather_map_list,
out_of_bounds_policy bounds_policy,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::segmented_gather(
source_column, gather_map_list, bounds_policy, cudf::get_default_stream(), mr);
return detail::segmented_gather(source_column, gather_map_list, bounds_policy, stream, mr);
}

} // namespace lists
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/lists/extract.cu
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,11 @@ std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
*/
std::unique_ptr<column> extract_list_element(lists_column_view const& lists_column,
size_type index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::extract_list_element(lists_column, index, cudf::get_default_stream(), mr);
return detail::extract_list_element(lists_column, index, stream, mr);
}

/**
Expand All @@ -209,12 +210,13 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
*/
std::unique_ptr<column> extract_list_element(lists_column_view const& lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(indices.size() == lists_column.size(),
"Index column must have as many elements as lists column.");
return detail::extract_list_element(lists_column, indices, cudf::get_default_stream(), mr);
return detail::extract_list_element(lists_column, indices, stream, mr);
}

} // namespace lists
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/lists/reverse.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,12 @@ std::unique_ptr<column> reverse(lists_column_view const& input,

} // namespace detail

std::unique_ptr<column> reverse(lists_column_view const& input, rmm::mr::device_memory_resource* mr)
std::unique_ptr<column> reverse(lists_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::reverse(input, cudf::get_default_stream(), mr);
return detail::reverse(input, stream, mr);
}

} // namespace cudf::lists
7 changes: 4 additions & 3 deletions cpp/src/lists/segmented_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,21 @@ std::unique_ptr<column> stable_sort_lists(lists_column_view const& input,
std::unique_ptr<column> sort_lists(lists_column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::sort_lists(input, column_order, null_precedence, cudf::get_default_stream(), mr);
return detail::sort_lists(input, column_order, null_precedence, stream, mr);
}

std::unique_ptr<column> stable_sort_lists(lists_column_view const& input,
order column_order,
null_order null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::stable_sort_lists(
input, column_order, null_precedence, cudf::get_default_stream(), mr);
return detail::stable_sort_lists(input, column_order, null_precedence, stream, mr);
}

} // namespace lists
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/lists/sequences.cu
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,21 @@ std::unique_ptr<column> sequences(column_view const& starts,

std::unique_ptr<column> sequences(column_view const& starts,
column_view const& sizes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::sequences(starts, sizes, cudf::get_default_stream(), mr);
return detail::sequences(starts, sizes, stream, mr);
}

std::unique_ptr<column> sequences(column_view const& starts,
column_view const& steps,
column_view const& sizes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::sequences(starts, steps, sizes, cudf::get_default_stream(), mr);
return detail::sequences(starts, steps, sizes, stream, mr);
}

} // namespace cudf::lists
3 changes: 2 additions & 1 deletion cpp/src/lists/stream_compaction/apply_boolean_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,

std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
lists_column_view const& boolean_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr);
return detail::apply_boolean_mask(input, boolean_mask, stream, mr);
}

} // namespace cudf::lists
3 changes: 2 additions & 1 deletion cpp/src/lists/stream_compaction/distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,11 @@ std::unique_ptr<column> distinct(lists_column_view const& input,
std::unique_ptr<column> distinct(lists_column_view const& input,
null_equality nulls_equal,
nan_equality nans_equal,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::distinct(input, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
return detail::distinct(input, nulls_equal, nans_equal, stream, mr);
}

} // namespace cudf::lists
1 change: 1 addition & 0 deletions cpp/tests/groupby/histogram_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ auto groupby_histogram(cudf::column_view const& keys,
auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals},
cudf::order::ASCENDING,
cudf::null_order::BEFORE,
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());

return std::pair{std::move(sorted_keys), std::move(sorted_histograms)};
Expand Down
Loading

0 comments on commit c47546e

Please sign in to comment.