From 557aabf8d0be528881aadb9795e6d92790a085a8 Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Tue, 3 Sep 2024 11:43:05 -0500
Subject: [PATCH 1/6] Ensure we pass the has_nulls tparam to mixed_join kernels
 (#16708)

Fixes https://github.com/rapidsai/cudf/issues/16706

I'll build/test our stack with this change, but it looks like a typo.

If there's a quick unit test we can add I'd be happy to hear recommendations or for someone else to follow on with such a test.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/16708
---
 cpp/src/join/mixed_join_kernel.cuh      | 2 +-
 cpp/src/join/mixed_join_size_kernel.cuh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index 9d011d43de6..368b1fba870 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -130,7 +130,7 @@ void launch_mixed_join(table_device_view left_table,
                        int64_t shmem_size_per_block,
                        rmm::cuda_stream_view stream)
 {
-  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>
+  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index a1066e32331..84e9be45030 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -124,7 +124,7 @@ std::size_t launch_compute_mixed_join_output_size(
   // Allocate storage for the counter used to get the size of the join output
   rmm::device_scalar<std::size_t> size(0, stream, mr);
 
-  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
+  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,

From 25779d95d413e0ddf9379dee22e36eea7bf5f08e Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 3 Sep 2024 12:24:36 -0500
Subject: [PATCH 2/6] Add boost-devel to Java CI Docker image (#16707)

Fixes #16678.  Adds the boost-devel package to the Java CI Docker environment now that the Boost headers are not being picked up implicitly after libcudf dropped the Arrow dependency in #16640.  libcudfjni still requires Arrow for now, and thus requires Boost headers.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16707
---
 java/ci/Dockerfile.rocky | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index 6b87f3ed34e..152af22f7e4 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -28,7 +28,7 @@ ARG TARGETPLATFORM=linux/amd64
 FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
 ARG TOOLSET_VERSION=11
 ### Install basic requirements
-RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build
+RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 

From 0097b454254ac30739c59dee8f29a91e6643360b Mon Sep 17 00:00:00 2001
From: Hirota Akio <33370421+a-hirota@users.noreply.github.com>
Date: Wed, 4 Sep 2024 02:28:16 +0900
Subject: [PATCH 3/6] Fix typo in column_factories.hpp comment from 'depth 1'
 to 'depth 2' (#16700)

This PR fixes a typo in the `cpp/include/cudf/column/column_factories.hpp` file. The comment incorrectly mentioned "data (depth 1)" instead of "data (depth 2)". This correction improves code clarity and documentation accuracy.

Authors:
  - Hirota Akio (https://github.com/a-hirota)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16700
---
 cpp/include/cudf/column/column_factories.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index c1f295b7ea8..b2dcb25acb5 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -469,7 +469,7 @@ std::unique_ptr<column> make_strings_column(size_type num_strings,
  * offsets (depth 1)   {0, 2, 5, 7}
  * data    (depth 1)
  * offsets (depth 2)
- * data    (depth 1)   {1, 2, 3, 4, 5, 6, 7}
+ * data    (depth 2)   {1, 2, 3, 4, 5, 6, 7}
  * @endcode
  *
  * @param[in] num_rows The number of lists the column represents.

From e18b537315c07b73d1eb26354208249605e3e8be Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 3 Sep 2024 08:30:15 -1000
Subject: [PATCH 4/6] Use Series._from_column more consistently to avoid
 validation (#16716)

This modifies cases where `_from_column` provided the same logic or where 1 column was produced so `._from_column` was valid to use

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/16716
---
 python/cudf/cudf/_lib/text.pyx     |  2 +-
 python/cudf/cudf/core/dataframe.py | 15 ++++-----------
 python/cudf/cudf/core/series.py    | 14 ++++++--------
 python/cudf/cudf/io/text.py        |  2 +-
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index ece69b424bb..b2c7232f549 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -86,4 +86,4 @@ def read_text(object filepaths_or_buffers,
             delim,
             c_options))
 
-    return {None: Column.from_unique_ptr(move(c_col))}
+    return Column.from_unique_ptr(move(c_col))
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0d632f4775f..7a171fe9e05 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -473,15 +473,8 @@ def __getitem__(self, arg):
         ca = self._frame._data
         index = self._frame.index
         if col_is_scalar:
-            s = Series._from_data(
-                data=ColumnAccessor(
-                    {key: ca._data[key] for key in column_names},
-                    multiindex=ca.multiindex,
-                    level_names=ca.level_names,
-                    verify=False,
-                ),
-                index=index,
-            )
+            name = column_names[0]
+            s = Series._from_column(ca._data[name], name=name, index=index)
             return s._getitem_preprocessed(row_spec)
         if column_names != list(self._frame._column_names):
             frame = self._frame._from_data(
@@ -7770,8 +7763,8 @@ def interleave_columns(self):
                 "interleave_columns does not support 'category' dtype."
             )
 
-        return self._constructor_sliced._from_data(
-            {None: libcudf.reshape.interleave_columns([*self._columns])}
+        return self._constructor_sliced._from_column(
+            libcudf.reshape.interleave_columns([*self._columns])
         )
 
     @_performance_tracking
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index aadbd80f4b4..48445f018d3 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -611,9 +611,7 @@ def from_masked_array(cls, data, mask, null_count=None):
         4      14
         dtype: int64
         """
-        col = as_column(data).set_mask(mask)
-        ca = ColumnAccessor({None: col}, verify=False)
-        return cls._from_data(ca)
+        return cls._from_column(as_column(data).set_mask(mask))
 
     @_performance_tracking
     def __init__(
@@ -1150,7 +1148,7 @@ def reset_index(
             if name is no_default:
                 name = 0 if self.name is None else self.name
             data[name] = data.pop(self.name)
-            return cudf.core.dataframe.DataFrame._from_data(data, index)
+            return self._constructor_expanddim._from_data(data, index)
         # For ``name`` behavior, see:
         # https://github.com/pandas-dev/pandas/issues/44575
         # ``name`` has to be ignored when `drop=True`
@@ -1661,9 +1659,7 @@ def _concat(cls, objs, axis=0, index: bool = True):
         if len(objs):
             col = col._with_type_metadata(objs[0].dtype)
 
-        return cls._from_data(
-            ColumnAccessor({name: col}, verify=False), index=result_index
-        )
+        return cls._from_column(col, name=name, index=result_index)
 
     @property  # type: ignore
     @_performance_tracking
@@ -1977,7 +1973,9 @@ def between(self, left, right, inclusive="both") -> Series:
                 "Inclusive has to be either string of 'both', "
                 "'left', 'right', or 'neither'."
             )
-        return self._from_data({self.name: lmask & rmask}, self.index)
+        return self._from_column(
+            lmask & rmask, name=self.name, index=self.index
+        )
 
     @_performance_tracking
     def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 0043efce1e4..5ce738cae0e 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -33,7 +33,7 @@ def read_text(
         filepath_or_buffer, "read_text"
     )
 
-    return cudf.Series._from_data(
+    return cudf.Series._from_column(
         libtext.read_text(
             filepath_or_buffer,
             delimiter=delimiter,

From a83ac6f27254b2ebf99397d81b776c74f93469bf Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:07:49 -1000
Subject: [PATCH 5/6] Add return type annotations to MultiIndex (#16696)

Mostly just return type annotations. No logic changes.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/16696
---
 docs/cudf/source/conf.py            |   2 +
 python/cudf/cudf/core/multiindex.py | 109 ++++++++++++++++------------
 2 files changed, 63 insertions(+), 48 deletions(-)

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index c58bc42327c..95813907bf4 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -566,6 +566,8 @@ def on_missing_reference(app, env, node, contnode):
     ("py:obj", "cudf.Index.to_flat_index"),
     ("py:obj", "cudf.MultiIndex.to_flat_index"),
     ("py:meth", "pyarrow.Table.to_pandas"),
+    ("py:class", "pd.DataFrame"),
+    ("py:class", "pandas.core.indexes.frozen.FrozenList"),
     ("py:class", "pa.Array"),
     ("py:class", "ScalarLike"),
     ("py:class", "ParentType"),
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index a66e2936e3b..e00890ac5c3 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -247,7 +247,7 @@ def to_series(self, index=None, name=None):
         )
 
     @_performance_tracking
-    def astype(self, dtype, copy: bool = True):
+    def astype(self, dtype, copy: bool = True) -> Self:
         if not is_object_dtype(dtype):
             raise TypeError(
                 "Setting a MultiIndex dtype to anything other than object is "
@@ -256,7 +256,7 @@ def astype(self, dtype, copy: bool = True):
         return self
 
     @_performance_tracking
-    def rename(self, names, inplace=False):
+    def rename(self, names, inplace: bool = False) -> Self | None:
         """
         Alter MultiIndex level names
 
@@ -303,7 +303,9 @@ def rename(self, names, inplace=False):
         return self.set_names(names, level=None, inplace=inplace)
 
     @_performance_tracking
-    def set_names(self, names, level=None, inplace=False):
+    def set_names(
+        self, names, level=None, inplace: bool = False
+    ) -> Self | None:
         names_is_list_like = is_list_like(names)
         level_is_list_like = is_list_like(level)
 
@@ -345,7 +347,7 @@ def _from_data(
         cls,
         data: MutableMapping,
         name: Any = None,
-    ) -> MultiIndex:
+    ) -> Self:
         """
         Use when you have a ColumnAccessor-like mapping but no codes and levels.
         """
@@ -394,7 +396,7 @@ def copy(
         names=None,
         deep=False,
         name=None,
-    ):
+    ) -> Self:
         """Returns copy of MultiIndex object.
 
         Returns a copy of `MultiIndex`. The `levels` and `codes` value can be
@@ -457,7 +459,7 @@ def copy(
         )
 
     @_performance_tracking
-    def __repr__(self):
+    def __repr__(self) -> str:
         max_seq_items = pd.get_option("display.max_seq_items") or len(self)
 
         if len(self) > max_seq_items:
@@ -503,7 +505,7 @@ def __repr__(self):
     @property  # type: ignore
     @_external_only_api("Use ._codes instead")
     @_performance_tracking
-    def codes(self):
+    def codes(self) -> pd.core.indexes.frozen.FrozenList:
         """
         Returns the codes of the underlying MultiIndex.
 
@@ -531,7 +533,7 @@ def get_slice_bound(self, label, side):
 
     @property  # type: ignore
     @_performance_tracking
-    def nlevels(self):
+    def nlevels(self) -> int:
         """Integer number of levels in this MultiIndex."""
         return self._num_columns
 
@@ -590,7 +592,7 @@ def _get_level_label(self, level):
             return self.names[level]
 
     @_performance_tracking
-    def isin(self, values, level=None):
+    def isin(self, values, level=None) -> cp.ndarray:
         """Return a boolean array where the index values are in values.
 
         Compute boolean array of whether each index value is found in
@@ -864,7 +866,7 @@ def _validate_indexer(
         | slice
         | tuple[Any, ...]
         | list[tuple[Any, ...]],
-    ):
+    ) -> None:
         if isinstance(indexer, numbers.Number):
             return
         if isinstance(indexer, tuple):
@@ -900,12 +902,12 @@ def __eq__(self, other):
 
     @property  # type: ignore
     @_performance_tracking
-    def size(self):
+    def size(self) -> int:
         # The size of a MultiIndex is only dependent on the number of rows.
         return self._num_rows
 
     @_performance_tracking
-    def take(self, indices):
+    def take(self, indices) -> Self:
         if isinstance(indices, cudf.Series) and indices.has_nulls:
             raise ValueError("Column must have no nulls.")
         obj = super().take(indices)
@@ -957,7 +959,12 @@ def __getitem__(self, index):
             return result
 
     @_performance_tracking
-    def to_frame(self, index=True, name=no_default, allow_duplicates=False):
+    def to_frame(
+        self,
+        index: bool = True,
+        name=no_default,
+        allow_duplicates: bool = False,
+    ) -> cudf.DataFrame:
         """
         Create a DataFrame with the levels of the MultiIndex as columns.
 
@@ -1034,7 +1041,7 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False):
         )
 
     @_performance_tracking
-    def get_level_values(self, level):
+    def get_level_values(self, level) -> cudf.Index:
         """
         Return the values at the requested level
 
@@ -1067,30 +1074,30 @@ def get_level_values(self, level):
         )
         return level_values
 
-    def _is_numeric(self):
+    def _is_numeric(self) -> bool:
         return False
 
-    def _is_boolean(self):
+    def _is_boolean(self) -> bool:
         return False
 
-    def _is_integer(self):
+    def _is_integer(self) -> bool:
         return False
 
-    def _is_floating(self):
+    def _is_floating(self) -> bool:
         return False
 
-    def _is_object(self):
+    def _is_object(self) -> bool:
         return False
 
-    def _is_categorical(self):
+    def _is_categorical(self) -> bool:
         return False
 
-    def _is_interval(self):
+    def _is_interval(self) -> bool:
         return False
 
     @classmethod
     @_performance_tracking
-    def _concat(cls, objs):
+    def _concat(cls, objs) -> Self:
         source_data = [o.to_frame(index=False) for o in objs]
 
         # TODO: Verify if this is really necessary or if we can rely on
@@ -1100,17 +1107,19 @@ def _concat(cls, objs):
             for obj in source_data[1:]:
                 obj.columns = colnames
 
-        source_data = cudf.DataFrame._concat(source_data)
+        source_df = cudf.DataFrame._concat(source_data)
         try:
             # Only set names if all objs have the same names
             (names,) = {o.names for o in objs} - {None}
         except ValueError:
-            names = [None] * source_data._num_columns
-        return cudf.MultiIndex.from_frame(source_data, names=names)
+            names = [None] * source_df._num_columns
+        return cudf.MultiIndex.from_frame(source_df, names=names)
 
     @classmethod
     @_performance_tracking
-    def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
+    def from_tuples(
+        cls, tuples, sortorder: int | None = None, names=None
+    ) -> Self:
         """
         Convert list of tuples to MultiIndex.
 
@@ -1153,7 +1162,7 @@ def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
         return cls.from_pandas(pdi)
 
     @_performance_tracking
-    def to_numpy(self):
+    def to_numpy(self) -> np.ndarray:
         return self.values_host
 
     def to_flat_index(self):
@@ -1167,7 +1176,7 @@ def to_flat_index(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def values_host(self):
+    def values_host(self) -> np.ndarray:
         """
         Return a numpy representation of the MultiIndex.
 
@@ -1195,7 +1204,7 @@ def values_host(self):
 
     @property  # type: ignore
     @_performance_tracking
-    def values(self):
+    def values(self) -> cp.ndarray:
         """
         Return a CuPy representation of the MultiIndex.
 
@@ -1236,7 +1245,7 @@ def from_frame(
         df: pd.DataFrame | cudf.DataFrame,
         sortorder: int | None = None,
         names=None,
-    ):
+    ) -> Self:
         """
         Make a MultiIndex from a DataFrame.
 
@@ -1303,7 +1312,9 @@ def from_frame(
 
     @classmethod
     @_performance_tracking
-    def from_product(cls, iterables, sortorder: int | None = None, names=None):
+    def from_product(
+        cls, iterables, sortorder: int | None = None, names=None
+    ) -> Self:
         """
         Make a MultiIndex from the cartesian product of multiple iterables.
 
@@ -1355,7 +1366,7 @@ def from_arrays(
         arrays,
         sortorder=None,
         names=None,
-    ) -> MultiIndex:
+    ) -> Self:
         """
         Convert arrays to MultiIndex.
 
@@ -1410,7 +1421,7 @@ def from_arrays(
         )
 
     @_performance_tracking
-    def _poplevels(self, level):
+    def _poplevels(self, level) -> None | MultiIndex | cudf.Index:
         """
         Remove and return the specified levels from self.
 
@@ -1461,7 +1472,7 @@ def _poplevels(self, level):
         return popped
 
     @_performance_tracking
-    def swaplevel(self, i=-2, j=-1):
+    def swaplevel(self, i=-2, j=-1) -> Self:
         """
         Swap level i with level j.
         Calling this method does not change the ordering of the values.
@@ -1512,7 +1523,7 @@ def swaplevel(self, i=-2, j=-1):
         return midx
 
     @_performance_tracking
-    def droplevel(self, level=-1):
+    def droplevel(self, level=-1) -> MultiIndex | cudf.Index:
         """
         Removes the specified levels from the MultiIndex.
 
@@ -1598,7 +1609,9 @@ def to_pandas(
 
     @classmethod
     @_performance_tracking
-    def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
+    def from_pandas(
+        cls, multiindex: pd.MultiIndex, nan_as_null=no_default
+    ) -> Self:
         """
         Convert from a Pandas MultiIndex
 
@@ -1633,11 +1646,11 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
 
     @cached_property  # type: ignore
     @_performance_tracking
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return len(self) == len(self.unique())
 
     @property
-    def dtype(self):
+    def dtype(self) -> np.dtype:
         return np.dtype("O")
 
     @_performance_tracking
@@ -1706,7 +1719,7 @@ def is_monotonic_decreasing(self) -> bool:
         )
 
     @_performance_tracking
-    def fillna(self, value):
+    def fillna(self, value) -> Self:
         """
         Fill null values with the specified value.
 
@@ -1758,7 +1771,7 @@ def nunique(self, dropna: bool = True) -> int:
         mi = self.dropna(how="all") if dropna else self
         return len(mi.unique())
 
-    def _clean_nulls_from_index(self):
+    def _clean_nulls_from_index(self) -> Self:
         """
         Convert all na values(if any) in MultiIndex object
         to `<NA>` as a preprocessing step to `__repr__` methods.
@@ -1769,20 +1782,20 @@ def _clean_nulls_from_index(self):
         )
 
     @_performance_tracking
-    def memory_usage(self, deep=False):
+    def memory_usage(self, deep: bool = False) -> int:
         usage = sum(col.memory_usage for col in self._columns)
         usage += sum(level.memory_usage(deep=deep) for level in self._levels)
         usage += sum(code.memory_usage for code in self._codes)
         return usage
 
     @_performance_tracking
-    def difference(self, other, sort=None):
+    def difference(self, other, sort=None) -> Self:
         if hasattr(other, "to_pandas"):
             other = other.to_pandas()
         return cudf.from_pandas(self.to_pandas().difference(other, sort))
 
     @_performance_tracking
-    def append(self, other):
+    def append(self, other) -> Self:
         """
         Append a collection of MultiIndex objects together
 
@@ -2000,7 +2013,7 @@ def get_loc(self, key):
         mask[true_inds] = True
         return mask
 
-    def _get_reconciled_name_object(self, other) -> MultiIndex:
+    def _get_reconciled_name_object(self, other) -> Self:
         """
         If the result of a set operation will be self,
         return self, unless the names change, in which
@@ -2026,7 +2039,7 @@ def _maybe_match_names(self, other):
         ]
 
     @_performance_tracking
-    def union(self, other, sort=None):
+    def union(self, other, sort=None) -> Self:
         if not isinstance(other, MultiIndex):
             msg = "other must be a MultiIndex or a list of tuples"
             try:
@@ -2050,7 +2063,7 @@ def union(self, other, sort=None):
         return self._union(other, sort=sort)
 
     @_performance_tracking
-    def _union(self, other, sort=None):
+    def _union(self, other, sort=None) -> Self:
         # TODO: When to_frame is refactored to return a
         # deep copy in future, we should push most of the common
         # logic between MultiIndex._union & BaseIndex._union into
@@ -2076,7 +2089,7 @@ def _union(self, other, sort=None):
         return midx
 
     @_performance_tracking
-    def _intersection(self, other, sort=None):
+    def _intersection(self, other, sort=None) -> Self:
         if self.names != other.names:
             deep = True
             col_names = list(range(0, self.nlevels))
@@ -2167,7 +2180,7 @@ def _columns_for_reset_index(
         else:
             yield from self._split_columns_by_levels(levels, in_levels=True)
 
-    def repeat(self, repeats, axis=None):
+    def repeat(self, repeats, axis=None) -> Self:
         return self._from_data(
             self._data._from_columns_like_self(
                 super()._repeat([*self._columns], repeats, axis)

From fa1486e1d1d09116d2b5f57dfef7d9307ebc76c6 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 3 Sep 2024 16:31:30 -0400
Subject: [PATCH 6/6] Remove ERROR_TEST gtest from libcudf (#16722)

Removes the `ERROR_TEST` gtest from libcudf. This test was only verifying some macros on mostly CUDA behavior and not libcudf specific functions. The tests have become troublesome to support in CI especially in conjunction with other tools like `compute-sanitizer`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub)

URL: https://github.com/rapidsai/cudf/pull/16722
---
 cpp/tests/CMakeLists.txt               |   4 -
 cpp/tests/error/error_handling_test.cu | 136 -------------------------
 2 files changed, 140 deletions(-)
 delete mode 100644 cpp/tests/error/error_handling_test.cu

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index f86acbcc51b..1bedb344a01 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -110,10 +110,6 @@ ConfigureTest(SCALAR_TEST scalar/scalar_test.cpp scalar/scalar_device_view_test.
 # * timestamps tests ------------------------------------------------------------------------------
 ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu)
 
-# ##################################################################################################
-# * cudf tests ------------------------------------------------------------------------------------
-ConfigureTest(ERROR_TEST error/error_handling_test.cu)
-
 # ##################################################################################################
 # * groupby tests ---------------------------------------------------------------------------------
 ConfigureTest(
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
deleted file mode 100644
index 9c7459fa69d..00000000000
--- a/cpp/tests/error/error_handling_test.cu
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/default_stream.hpp>
-#include <cudf_test/stream_checking_resource_adaptor.hpp>
-#include <cudf_test/testing_main.hpp>
-
-#include <cudf/filling.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream.hpp>
-
-TEST(ExpectsTest, FalseCondition)
-{
-  EXPECT_THROW(CUDF_EXPECTS(false, "condition is false"), cudf::logic_error);
-}
-
-TEST(ExpectsTest, TrueCondition) { EXPECT_NO_THROW(CUDF_EXPECTS(true, "condition is true")); }
-
-TEST(CudaTryTest, Error) { EXPECT_THROW(CUDF_CUDA_TRY(cudaErrorLaunchFailure), cudf::cuda_error); }
-
-TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
-
-TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); }
-
-namespace {
-// Some silly kernel that will cause an error
-CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; }
-}  // namespace
-
-// In a release build and without explicit synchronization, CUDF_CHECK_CUDA may
-// or may not fail on erroneous asynchronous CUDA calls. Invoke
-// cudaStreamSynchronize to guarantee failure on error. In a non-release build,
-// CUDF_CHECK_CUDA deterministically fails on erroneous asynchronous CUDA
-// calls.
-TEST(StreamCheck, FailedKernel)
-{
-  rmm::cuda_stream stream;
-  int a;
-  test_kernel<<<0, 0, 0, stream.value()>>>(&a);
-#ifdef NDEBUG
-  stream.synchronize();
-#endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
-}
-
-TEST(StreamCheck, CatchFailedKernel)
-{
-  rmm::cuda_stream stream;
-  int a;
-  test_kernel<<<0, 0, 0, stream.value()>>>(&a);
-#ifndef NDEBUG
-  stream.synchronize();
-#endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
-}
-
-CUDF_KERNEL void kernel() { asm("trap;"); }
-
-TEST(DeathTest, CudaFatalError)
-{
-  testing::FLAGS_gtest_death_test_style = "threadsafe";
-  auto call_kernel                      = []() {
-    kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>();
-    try {
-      CUDF_CUDA_TRY(cudaDeviceSynchronize());
-    } catch (const cudf::fatal_cuda_error& fe) {
-      std::abort();
-    }
-  };
-  ASSERT_DEATH(call_kernel(), "");
-}
-
-#ifndef NDEBUG
-
-CUDF_KERNEL void assert_false_kernel() { cudf_assert(false && "this kernel should die"); }
-
-CUDF_KERNEL void assert_true_kernel() { cudf_assert(true && "this kernel should live"); }
-
-TEST(DebugAssertDeathTest, cudf_assert_false)
-{
-  testing::FLAGS_gtest_death_test_style = "threadsafe";
-
-  auto call_kernel = []() {
-    auto const stream = cudf::get_default_stream().value();
-    assert_false_kernel<<<1, 1, 0, stream>>>();
-
-    // Kernel should fail with `cudaErrorAssert`
-    // This error invalidates the current device context, so we need to kill
-    // the current process. Running with EXPECT_DEATH spawns a new process for
-    // each attempted kernel launch
-    if (cudaErrorAssert == cudaDeviceSynchronize()) { std::abort(); }
-
-    // If we reach this point, the cudf_assert didn't work so we exit normally, which will cause
-    // EXPECT_DEATH to fail.
-  };
-
-  EXPECT_DEATH(call_kernel(), "this kernel should die");
-}
-
-TEST(DebugAssert, cudf_assert_true)
-{
-  auto const stream = cudf::get_default_stream().value();
-  assert_true_kernel<<<1, 1, 0, stream>>>();
-  ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
-}
-
-#endif
-
-// These tests don't use CUDF_TEST_PROGRAM_MAIN because :
-// 1.) They don't need the RMM Pool
-// 2.) The RMM Pool interferes with the death test
-int main(int argc, char** argv)
-{
-  if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return 0; }
-
-  ::testing::InitGoogleTest(&argc, argv);
-  auto const cmd_opts = parse_cudf_test_opts(argc, argv);
-  auto adaptor        = make_stream_mode_adaptor(cmd_opts);
-  return RUN_ALL_TESTS();
-}