Skip to content

Commit

Permalink
Improve contains_column by invoking contains_table (#14238)
Browse files Browse the repository at this point in the history
Part of ##12261

This PR simplifies the `contains_column` implementation by invoking `contains_table` and gets rid of the use of the cudf `unordered_multiset`. It also removes the `unordered_multiset` header file from libcudf.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: #14238
  • Loading branch information
PointKernel authored Oct 4, 2023
1 parent d87e181 commit b120f7e
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 225 deletions.
159 changes: 0 additions & 159 deletions cpp/src/hash/unordered_multiset.cuh

This file was deleted.

67 changes: 1 addition & 66 deletions cpp/src/search/contains_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,85 +14,22 @@
* limitations under the License.
*/

#include <hash/unordered_multiset.cuh>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/search.hpp>
#include <cudf/dictionary/detail/search.hpp>
#include <cudf/dictionary/detail/update_keys.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>
#include <thrust/uninitialized_fill.h>

namespace cudf {
namespace detail {

namespace {

struct contains_column_dispatch {
template <typename Element, typename Haystack>
struct contains_fn {
bool __device__ operator()(size_type const idx) const
{
if (needles_have_nulls && needles.is_null_nocheck(idx)) {
// Exit early. The value doesn't matter, and will be masked as a null element.
return true;
}

return haystack.contains(needles.template element<Element>(idx));
}

Haystack const haystack;
column_device_view const needles;
bool const needles_have_nulls;
};

template <typename Element, CUDF_ENABLE_IF(!is_nested<Element>())>
std::unique_ptr<column> operator()(column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto result = make_numeric_column(data_type{type_to_id<bool>()},
needles.size(),
copy_bitmask(needles, stream, mr),
needles.null_count(),
stream,
mr);
if (needles.is_empty()) { return result; }

auto const out_begin = result->mutable_view().template begin<bool>();
if (haystack.is_empty()) {
thrust::uninitialized_fill(
rmm::exec_policy(stream), out_begin, out_begin + needles.size(), false);
return result;
}

auto const haystack_set = cudf::detail::unordered_multiset<Element>::create(haystack, stream);
auto const haystack_set_dv = haystack_set.to_device();
auto const needles_cdv_ptr = column_device_view::create(needles, stream);

thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(needles.size()),
out_begin,
contains_fn<Element, decltype(haystack_set_dv)>{
haystack_set_dv, *needles_cdv_ptr, needles.has_nulls()});

result->set_null_count(needles.null_count());

return result;
}

template <typename Element, CUDF_ENABLE_IF(is_nested<Element>())>
template <typename Element>
std::unique_ptr<column> operator()(column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream,
Expand Down Expand Up @@ -144,8 +81,6 @@ std::unique_ptr<column> contains(column_view const& haystack,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(haystack.type() == needles.type(), "DTYPE mismatch");

return cudf::type_dispatcher(
haystack.type(), contains_column_dispatch{}, haystack, needles, stream, mr);
}
Expand Down

0 comments on commit b120f7e

Please sign in to comment.